diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -8,7 +8,7 @@ "family": "Indo-European", "flores_path": "eng_Latn", "fleurs_tag": "en_us", - "commonvoice_hours": 2657.0, + "commonvoice_hours": 2658.0, "commonvoice_locale": "en", "in_benchmark": true }, @@ -2156,7 +2156,7 @@ "family": "Kartvelian", "flores_path": "kat_Geor", "fleurs_tag": "ka_ge", - "commonvoice_hours": 162.0, + "commonvoice_hours": 163.0, "commonvoice_locale": "ka", "in_benchmark": true }, @@ -3560,7 +3560,7 @@ "family": "Abkhaz-Adyge", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 30.0, + "commonvoice_hours": 31.0, "commonvoice_locale": "kbd", "in_benchmark": false }, @@ -6824,7 +6824,7 @@ "family": "Indo-European", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 16.0, + "commonvoice_hours": 17.0, "commonvoice_locale": "an", "in_benchmark": false }, @@ -8144,10 +8144,10 @@ "provider_name": "OpenAI", "cost": 0.6, "hf_id": null, - "creation_date": "NaT", "size": null, "type": "Commercial", - "license": null + "license": null, + "creation_date": "2024-07-18" }, { "id": "meta-llama/llama-4-maverick", @@ -8155,10 +8155,10 @@ "provider_name": "Meta", "cost": 0.85, "hf_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", - "creation_date": "2025-04-01T22:17:20+00:00", "size": 401583781376.0, "type": "Open", - "license": "Other" + "license": "Other", + "creation_date": "2025-04-01" }, { "id": "meta-llama/llama-3.3-70b-instruct", @@ -8166,116 +8166,17 @@ "provider_name": "Meta", "cost": 0.3, "hf_id": "meta-llama/Llama-3.3-70B-Instruct", - "creation_date": "2024-11-26T16:08:47+00:00", "size": 70553706496.0, "type": "Open", - "license": "Llama3.3" - }, - { - "id": "meta-llama/llama-3.1-70b-instruct", - "name": "Llama 3.1 70B Instruct", - "provider_name": "Meta", - "cost": 0.3, - "hf_id": "meta-llama/Llama-3.1-70B-Instruct", - "creation_date": "2024-07-16T16:07:46+00:00", - "size": 70553706496.0, - "type": "Open", - "license": "Llama3.1" - }, - { - "id": "meta-llama/llama-3-70b-instruct", - "name": "Llama 3 70B Instruct", - "provider_name": "Meta", - "cost": 0.4, - "hf_id": "meta-llama/Meta-Llama-3-70B-Instruct", - "creation_date": "2024-04-17T09:34:54+00:00", - "size": 70553706496.0, - "type": "Open", - "license": "Llama3" - }, - { - "id": "mistralai/mistral-small-3.1-24b-instruct", - "name": "Mistral Small 3.1 24B", - "provider_name": "Mistral", - "cost": 0.3, - "hf_id": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "creation_date": "2025-03-11T17:04:58+00:00", - "size": 24011361280.0, - "type": "Open", - "license": "Apache 2.0" - }, - { - "id": "google/gemini-2.0-flash-001", - "name": "Gemini 2.0 Flash", - "provider_name": "Google", - "cost": 0.4, - "hf_id": null, - "creation_date": "NaT", - "size": null, - "type": "Commercial", - "license": null - }, - { - "id": "google/gemma-3-27b-it", - "name": "Gemma 3 27B", - "provider_name": "Google", - "cost": 0.2, - "hf_id": "google/gemma-3-27b-it", - "creation_date": "2025-03-01T19:10:19+00:00", - "size": 27432406640.0, - "type": "Open", - "license": "Gemma" - }, - { - "id": "qwen/qwq-32b", - "name": "QwQ 32B", - "provider_name": "Qwen", - "cost": 0.2, - "hf_id": "Qwen/QwQ-32B", - "creation_date": "2025-03-05T14:16:59+00:00", - "size": 32763876352.0, - "type": "Open", - "license": "Apache 2.0" - }, - { - "id": "deepseek/deepseek-chat-v3-0324", - "name": "DeepSeek V3 0324", - "provider_name": "DeepSeek", - "cost": 1.1, - "hf_id": "deepseek-ai/DeepSeek-V3-0324", - "creation_date": "2025-03-24T09:28:22+00:00", - "size": 684531386000.0, - "type": "Open", - "license": "Mit" - }, - { - "id": "microsoft/phi-4-multimodal-instruct", - "name": "Phi 4 Multimodal Instruct", - "provider_name": "Microsoft", - "cost": 0.1, - "hf_id": "microsoft/Phi-4-multimodal-instruct", - "creation_date": "2025-02-24T22:33:32+00:00", - "size": 5574460384.0, - "type": "Open", - "license": "Mit" - }, - { - "id": "amazon/nova-micro-v1", - "name": "Nova Micro 1.0", - "provider_name": "Amazon", - "cost": 0.14, - "hf_id": null, - "creation_date": "NaT", - "size": null, - "type": "Commercial", - "license": null + "license": "Llama3.3", + "creation_date": "2024-11-26" } ], "scores": [ { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.5679608237702286, "sentence_nr": 0 @@ -8283,7 +8184,7 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.746881923400435, "sentence_nr": 0 @@ -8291,7 +8192,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.5592169701176339, "sentence_nr": 0 @@ -8299,7 +8200,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.7187320759449207, "sentence_nr": 0 @@ -8307,7 +8208,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.4438455475739657, "sentence_nr": 0 @@ -8315,159 +8216,15 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.6320800718582147, "sentence_nr": 0 }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5894973558751632, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7562097956860054, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3846086976522069, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5835344719191324, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3472596783998825, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5880210095195896, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5820808184424484, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.73788733854976, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5617561349997696, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7132694856647042, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2963216580569375, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5101500486835966, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6303545030576861, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.77785134764153, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15317719477157257, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38800976493585004, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6001453932849357, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.762029391170019, - "sentence_nr": 0 - }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.30676942927198475, "sentence_nr": 0 @@ -8475,7 +8232,7 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.4968492831219663, "sentence_nr": 0 @@ -8483,7 +8240,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.3742128962272385, "sentence_nr": 0 @@ -8491,7 +8248,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.5924994297544066, "sentence_nr": 0 @@ -8499,7 +8256,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.32063971770635635, "sentence_nr": 0 @@ -8507,159 +8264,15 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.5206258401513325, "sentence_nr": 0 }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39086127104761287, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6239956806265569, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3020679767949182, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5246291817407542, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2516114673955893, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5164808837319497, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2920008662633279, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47119207959541226, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4273817965049865, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6016204186733703, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3972267643943283, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5952617863931118, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2777551012631926, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49423240120783246, - "sentence_nr": 0 - }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.9878765474230741, "sentence_nr": 0 @@ -8667,7 +8280,7 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.9958930217841712, "sentence_nr": 0 @@ -8675,7 +8288,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.8780634320789833, "sentence_nr": 0 @@ -8683,7 +8296,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.926946700115022, "sentence_nr": 0 @@ -8691,7 +8304,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.7964573357809173, "sentence_nr": 0 @@ -8699,159 +8312,15 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.8458636471716781, "sentence_nr": 0 }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.9452996322890763, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.9463396364218181, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.9144679601133087, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.968636887477685, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6537803976048806, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7742226743967544, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.738238064391125, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8637738769684485, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.4226799078177409, "sentence_nr": 0 @@ -8859,7 +8328,7 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.5651672709988255, "sentence_nr": 0 @@ -8867,7 +8336,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.32406433662077544, "sentence_nr": 0 @@ -8875,7 +8344,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.5243586266504104, "sentence_nr": 0 @@ -8883,7 +8352,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.34633672321253084, "sentence_nr": 0 @@ -8891,209258 +8360,76946 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", "score": 0.5378805625051344, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3852076286218103, + "score": 0.5077888484472814, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5629822759777402, + "score": 0.6493197366069867, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.26021014514167856, + "score": 0.4318843329340524, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.4820043660869366, + "score": 0.6011096108554106, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2502214193201532, + "score": 0.3582301850807646, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.49819657249183386, + "score": 0.5380305837807603, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4224822177894696, + "score": 0.3732667150787326, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5623149154312317, + "score": 0.5674650482249737, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.441464946158803, + "score": 0.4641883721676649, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6003092613714627, + "score": 0.6403267149729506, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.34734422615832194, + "score": 0.300740577257699, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5262645092345396, + "score": 0.5272774705181614, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3232734746803988, + "score": 0.3576035471132581, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5256417654956012, + "score": 0.5426399702952437, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.13576358182705253, + "score": 0.4422044705926463, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.41529193531769876, + "score": 0.6089032707320831, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.24926331918525627, + "score": 0.3099603853356145, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4599756430080559, + "score": 0.5209233176748354, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.5077888484472814, + "score": 0.33210944907163426, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6493197366069867, + "score": 0.5289420578289948, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.4318843329340524, + "score": 0.4331131003868224, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6011096108554106, + "score": 0.5898969623074624, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3582301850807646, + "score": 0.35580399268816465, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5380305837807603, + "score": 0.5392592206305507, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.44175593938589236, + "score": 0.4475435253337274, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6111096352841461, + "score": 0.5956867226653717, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3905612192964119, + "score": 0.5274220384037692, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5861956606716949, + "score": 0.6765588140322357, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.388275825650142, + "score": 0.39317381456022266, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5946895227088745, + "score": 0.6026058740561834, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.5447134963471945, + "score": 0.4166560818400039, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.7261993659965442, + "score": 0.6515522498665886, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.47840604738578085, + "score": 0.42734667499155, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6297473901472479, + "score": 0.6397906518456509, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.48930936408255293, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.699085629239476, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.5009380663759289, + "score": 0.37224644590020084, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6679481474132949, + "score": 0.5716341952568125, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.2087397501881324, + "score": 0.36921945860245514, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.39410069470509135, + "score": 0.5602656572610939, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.4640742081615844, + "score": 0.3963410285961713, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6372680189651158, + "score": 0.613166190285915, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3732667150787326, + "score": 0.3660623361610902, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5674650482249737, + "score": 0.5840165124966731, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.4641883721676649, + "score": 0.4404175157492415, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.6403267149729506, + "score": 0.6499400950194552, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.300740577257699, + "score": 0.44294247711132617, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5272774705181614, + "score": 0.5915660675216782, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.4061066499716187, + "score": 0.500516497336299, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6080346530552228, + "score": 0.7019407549121803, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.31520390441828733, + "score": 0.40306183496110326, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5666753970394321, + "score": 0.6065077241830509, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.2426576141982896, + "score": 0.3756985486608933, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4681164293806726, + "score": 0.5991443770283833, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.4740970660161798, + "score": 0.4824471894538444, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6509090646705696, + "score": 0.6756807439055712, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3725907668893922, + "score": 0.4892530408936975, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5838909337906717, + "score": 0.6697286007212407, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.1229583779881281, + "score": 0.5009456904181451, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.3267617054992069, + "score": 0.6893719644090858, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.46418585410212687, + "score": 0.23363375253301555, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6257813924169782, + "score": 0.4539275409654266, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.21346660402255854, + "score": 0.498687604330117, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4749873824263006, + "score": 0.6495577010231699, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.3284656616594502, + "score": 0.18273944860385094, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5314089060682492, + "score": 0.44261865187418153, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3576035471132581, + "score": 0.30519601919508343, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5426399702952437, + "score": 0.48440897375540304, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4422044705926463, + "score": 0.3897372020625521, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6089032707320831, + "score": 0.5520780806464591, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3099603853356145, + "score": 0.2153742037697241, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5209233176748354, + "score": 0.4581737688885401, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.30939216619448856, + "score": 0.34655442187135127, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5208328629222005, + "score": 0.6023567722379627, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2417404985264926, + "score": 0.2775751476798985, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4540589962283635, + "score": 0.5467407840471017, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2840596414449913, + "score": 0.3372953649368346, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4892722276483434, + "score": 0.5482505380106469, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.29556470672244106, + "score": 0.2453787991485662, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5006575554615639, + "score": 0.4099668999237371, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.344338817815182, + "score": 0.2988083057066004, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5355920179313903, + "score": 0.4973008562914265, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.28528905353056333, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.4885812318466243, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.28716979381420105, + "score": 0.3091536050099401, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5171262478660463, + "score": 0.504963808447426, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.35120509380099896, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.05448112815049329, + "score": 0.5127991322787522, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3360010226928493, + "score": 0.2935204022158406, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5216531073745614, + "score": 0.4867597973247361, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.33210944907163426, + "score": 0.41613344165345995, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5289420578289948, + "score": 0.5740077532098984, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4331131003868224, + "score": 0.49132868804528823, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5898969623074624, + "score": 0.6524450166860349, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.35580399268816465, + "score": 0.2929684584911775, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5392592206305507, + "score": 0.5038324436049059, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.40724702386633355, + "score": 0.49428478171113605, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5650294312072152, + "score": 0.6360862650323953, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.3741026207881868, + "score": 0.27106784138456536, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5834523243646894, + "score": 0.458287745564531, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.3909683536530208, + "score": 0.4034224234291925, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5861999156017297, + "score": 0.5736798834726872, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.42514139917377647, + "score": 0.3109058809229358, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5934850967299605, + "score": 0.5045951829816013, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4100880948326119, + "score": 0.2822871796543221, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5748650910980349, + "score": 0.5208915029538709, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.1077205146963877, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.428338145564396, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.553414625382002, + "score": 0.2556346494160282, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.7074940030211, + "score": 0.4538035440310274, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.19250412598108757, + "score": 0.4102082155233312, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4448372401459185, + "score": 0.5474039587505726, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3109333640704356, + "score": 0.22327767951697297, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5072760587388273, + "score": 0.4063556880747369, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.4475435253337274, + "score": 0.2392120773016637, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5956867226653717, + "score": 0.440445343487272, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.5274220384037692, + "score": 0.22424453668984448, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6765588140322357, + "score": 0.41637444107955873, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.39317381456022266, + "score": 0.2572733200413211, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6026058740561834, + "score": 0.4520014138562526, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.3229030611977504, + "score": 0.40487199173556226, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5136703373168134, + "score": 0.5637204315528265, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.3113772787307771, + "score": 0.40165053057541866, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5344680037267059, + "score": 0.5837756195280097, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.35103160282487145, + "score": 0.40311197004738203, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5432217848942439, + "score": 0.5788525108956781, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.451294475352144, + "score": 0.5108628809804742, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6017904208103514, + "score": 0.6929396211173784, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4831574055451935, + "score": 0.49872195941208947, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6238976883927624, + "score": 0.6907054265096231, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.45313578977486535, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6160993561903745, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3667951090093586, + "score": 0.3335262554878992, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.574382729364071, + "score": 0.5258955094447381, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.0895824671662166, + "score": 0.3411271681324882, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.24679712992918926, + "score": 0.5323123267352375, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3511508047578372, + "score": 0.2651736858432996, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.502364219831564, + "score": 0.4491383344282561, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.4166560818400039, + "score": 0.37742688647401873, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6515522498665886, + "score": 0.5674314405993244, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.42734667499155, + "score": 0.24828430598240606, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6397906518456509, + "score": 0.5078550622606068, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.48930936408255293, + "score": 0.34545319957597864, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.699085629239476, + "score": 0.5727052860304503, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.49517040114696814, + "score": 0.339818403012025, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.672650019344124, + "score": 0.5156759219303986, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.47426274497383164, + "score": 0.4381699512774638, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6547171931962555, + "score": 0.5887410281104106, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.44697138732796604, + "score": 0.38968867962607934, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6533822343227146, + "score": 0.5581403039390647, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.46782777727347913, + "score": 0.12843096555088776, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6774531400702429, + "score": 0.3356201430079791, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.5803563388252858, + "score": 0.17727473966694943, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.7397216312303552, + "score": 0.3475071694578125, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.05034135169161612, + "score": 0.20451416608402828, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.25001156386121903, + "score": 0.38185285396290036, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.45495679780282583, + "score": 0.3536676112393946, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.699735222419999, + "score": 0.5252283198216768, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0025767494884759577, + "score": 0.5494025263062274, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.022849655955591117, + "score": 0.6860161543947312, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.49179307081132717, + "score": 0.2636405082687104, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6798382116037067, + "score": 0.5072009470421238, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.37224644590020084, + "score": 0.2348553453946444, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5716341952568125, + "score": 0.4891959123914518, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.36921945860245514, + "score": 0.4222656487192343, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5602656572610939, + "score": 0.6421614792137705, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.3963410285961713, + "score": 0.20174173621464261, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.613166190285915, + "score": 0.5179166118048267, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.390589858528132, + "score": 0.3461114139111442, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5498108214680063, + "score": 0.5537111972654953, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.33433503990805974, + "score": 0.353179331599201, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5465517653500693, + "score": 0.5558881348090785, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2820342917142487, + "score": 0.33522833358360765, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5410881356746259, + "score": 0.534195929930943, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5030966277003764, + "score": 0.3734491516745214, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.680466091037649, + "score": 0.5499493819792871, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.4611551555069207, + "score": 0.4397415106513502, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6294324146720465, + "score": 0.5907735810868658, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.19685577478840446, + "score": 0.31308824228412185, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4551749985589161, + "score": 0.4950165423717857, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.41865363173987147, + "score": 0.3612717557348476, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6246787832833863, + "score": 0.5558371668340614, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.18474062565700086, + "score": 0.46890796443667687, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.3944315616553734, + "score": 0.6445795993451092, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.4044350002821056, + "score": 0.30704694388456133, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5937440273149751, + "score": 0.5217468869740803, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.3660623361610902, + "score": 0.41661325369918395, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5840165124966731, + "score": 0.587715140145979, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.4404175157492415, + "score": 0.3937759330018993, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.6499400950194552, + "score": 0.5961746226614889, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.44294247711132617, + "score": 0.4088174428659509, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5915660675216782, + "score": 0.5924365007019256, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.418987297037058, + "score": 0.16285971091078436, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.5748791698084322, + "score": 0.43885470392891923, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.2020983719878774, + "score": 0.37339369029886144, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4110516731487298, + "score": 0.5432112723704581, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.3495696951007327, + "score": 0.23751632756038837, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.5497542561082874, + "score": 0.4414396968637268, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.40124039505328407, + "score": 0.21791041776703116, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5502161218203272, + "score": 0.44004369960566136, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.4342485684315921, + "score": 0.46773190351581395, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5862756549666985, + "score": 0.6215065422343401, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.2799135631577256, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.32846260295658253, + "score": 0.0, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.5234687470369108, + "score": 0.2035993189596312, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.0626814220834104, + "score": 0.13511029141047634, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.2649283376124583, + "score": 0.36856155052346085, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.3504606692020456, + "score": 0.0, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.5696982139616064, + "score": 0.17779867452221493, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.500516497336299, + "score": 0.20075037608245913, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.7019407549121803, + "score": 0.4313422346882818, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.40306183496110326, + "score": 0.1874333361540541, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.6065077241830509, + "score": 0.41793053821849296, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.3756985486608933, + "score": 0.3716703379730988, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5991443770283833, + "score": 0.5800216707448408, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.3437925129268647, + "score": 0.15080316480304565, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.5496158439811546, + "score": 0.4424628792965376, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.4457795438900481, + "score": 0.2777349520199055, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.6608358312257032, + "score": 0.5312509449503231, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.4389167617930115, + "score": 0.20390263030337064, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.6283965584123504, + "score": 0.4345747929502553, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.456804148784435, + "score": 0.17913113678266074, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.6653973164805368, + "score": 0.39874837064673946, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.4544549777519972, + "score": 0.39725392028587103, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.6588011478075102, + "score": 0.5469018582137435, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.3532549308527307, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.4720654627116666, + "score": 0.3625318570509803, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.6517469394467796, + "score": 0.5303179877188419, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.13081443497119305, + "score": 0.35328951154883514, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.417733523030983, + "score": 0.5286973900062114, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.5333753443479871, + "score": 0.3816118513389601, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.6915993702819169, + "score": 0.5423632561644341, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.4824471894538444, + "score": 0.4427412215990632, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.6756807439055712, + "score": 0.6222208791908107, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.4892530408936975, + "score": 0.3709917965006414, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.6697286007212407, + "score": 0.6015654773262525, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.5009456904181451, + "score": 0.3899394268518547, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.6893719644090858, + "score": 0.594841687625348, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.5011800954736271, + "score": 0.23150269995638142, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.6882325337556615, + "score": 0.46961020207805865, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.42451113499289145, + "score": 0.23712278533862596, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.6326418045965277, + "score": 0.5116265380743877, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.47347369701789205, + "score": 0.16820174403705807, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.681786235656136, + "score": 0.4159468803310715, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.4390212047669306, + "score": 0.30495379106243414, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.6475219955365487, + "score": 0.503838460756843, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.5043550869731553, + "score": 0.5238129782835811, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.6689462373151898, + "score": 0.6833665118503387, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.28630516999083483, + "score": 0.3682745409081855, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.4859658293338903, + "score": 0.5329336102063273, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.4628513442989428, + "score": 0.09425983742608171, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.6651586361790265, + "score": 0.32871133484905984, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.1500767455847696, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.004953764861294584, + "score": 0.35247049201056063, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.48578120610890896, + "score": 0.0, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.6418878687312928, + "score": 0.25911153048152963, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.23363375253301555, + "score": 0.0, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.4539275409654266, + "score": 0.1714827465806386, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.498687604330117, + "score": 0.20461279328052204, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.6495577010231699, + "score": 0.40700264333409225, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.18273944860385094, + "score": 0.022279489478813384, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.44261865187418153, + "score": 0.1674529343985772, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.1921021633645501, + "score": 0.4763809450534613, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.41090634933708026, + "score": 0.6797378130452167, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.2019984490972421, + "score": 0.36983487280597815, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.42969616197156246, + "score": 0.5775204256764592, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.24058995161649158, + "score": 0.3926191044336021, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.4522509933949415, + "score": 0.5853598001081626, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.23560673823249806, + "score": 0.4865718767050507, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.45057120279075363, + "score": 0.6519330394001581, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.34777225435927045, + "score": 0.4961186750382622, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5603739447290761, + "score": 0.6420056154822653, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.0343688963868873, + "score": 0.4047209070683015, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.2491787368704391, + "score": 0.5746231903096143, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.2488577037192601, + "score": 0.20522978206415157, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4700612059850866, + "score": 0.4816367810257562, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.2607066928529267, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.10246901021115776, + "score": 0.5190821165076681, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.26380432026626405, + "score": 0.3651499702707945, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4634992426765033, + "score": 0.539793217489328, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.30519601919508343, + "score": 0.4416835863595156, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.48440897375540304, + "score": 0.623117008858419, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.3897372020625521, + "score": 0.4249436481722545, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.5520780806464591, + "score": 0.6187144317500936, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.2153742037697241, + "score": 0.3867569653562107, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.4581737688885401, + "score": 0.5709420484876131, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.22583314893598608, + "score": 0.621898873312397, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.4634872123809323, + "score": 0.7757345897028827, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.2000682107464079, + "score": 0.5494249598159933, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.42213947952783815, + "score": 0.7465246513770903, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.22347194598034506, + "score": 0.5335140114876958, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.4665650707763161, + "score": 0.7053320460577175, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.32447211622666056, + "score": 0.1659862741557369, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5077210804141314, + "score": 0.4884440880714965, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.34059658886569716, + "score": 0.40269672228447434, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5263364808620599, + "score": 0.6225404903248234, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.3695995811393786, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6148303949607244, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.31145364701208733, + "score": 0.2917184142654506, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.5087911718200273, + "score": 0.5624937546502969, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.2373642291509686, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.004236700409670164, + "score": 0.521644947712484, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.3711271620335373, + "score": 0.2712572779797431, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.5606811328336353, + "score": 0.5422335579149541, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.34655442187135127, + "score": 0.42143379809685383, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.6023567722379627, + "score": 0.5946121916131629, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.2775751476798985, + "score": 0.4141553414774169, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.5467407840471017, + "score": 0.5906263169622974, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.3372953649368346, + "score": 0.20087168885945464, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.5482505380106469, + "score": 0.38939667381078735, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.3598041249522345, + "score": 0.19552775795890473, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.5672133517600307, + "score": 0.3925483761400883, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.275788082902897, + "score": 0.24101134936111826, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.4682894376569175, + "score": 0.42220180022908466, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.25810978038865107, + "score": 0.22800980663874482, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.5114862976334219, + "score": 0.4539695239053247, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.4459565225038376, + "score": 0.2364242732935431, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.6647029994959, + "score": 0.5166025885857578, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.46443675322751826, + "score": 0.21954964295787202, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.6512173868183774, + "score": 0.48436759393641593, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.5054426458074261, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6722694706437392, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.4244950970711203, + "score": 0.33382920003857136, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.6318443095842109, + "score": 0.5343019280932326, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.3885821466849501, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.28612208859224425, + "score": 0.5985448528428169, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.20955561269928308, + "score": 0.35162367832688185, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.451252540938088, + "score": 0.5470403853789135, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.2453787991485662, + "score": 0.25066959615472983, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4099668999237371, + "score": 0.4464863544842361, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.2988083057066004, + "score": 0.24634920227044405, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4973008562914265, + "score": 0.4663964950094987, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.28528905353056333, + "score": 0.2030779777377279, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4885812318466243, + "score": 0.433265414942881, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.35797362976091973, + "score": 0.3969548673353603, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.5379266632230616, + "score": 0.6084494342072353, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.2606045000988204, + "score": 0.44834209038718303, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.4525313379099324, + "score": 0.6192927072328505, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.32155751243171055, + "score": 0.4059702785610718, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.5282954234137397, + "score": 0.5924126044868774, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.4400355050484472, + "score": 0.4540422742824559, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.5988144881332053, + "score": 0.6751320303512911, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.4187059279293422, + "score": 0.5339026027654551, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.5806611969643932, + "score": 0.7303050277242, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.45382991587984656, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6795124822993059, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.42286596174824126, + "score": 0.0, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.5934357258501683, + "score": 0.1518030911347623, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.03947087289497203, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.27694098293799824, + "score": 0.24871772534163297, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.36560991595112396, + "score": 0.0, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.537072365457506, + "score": 0.15503659808176187, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.3091536050099401, + "score": 0.0, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.504963808447426, + "score": 0.0019920318725099606, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.35120509380099896, + "score": 0.3259608048468566, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.5127991322787522, + "score": 0.546117067949716, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.2935204022158406, + "score": 0.1658740169858733, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.4867597973247361, + "score": 0.4024808935109278, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.36394690002866714, + "score": 0.28031528470622435, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.5567484827579814, + "score": 0.5080912630709646, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.278704088378991, + "score": 0.43161598042102073, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.4868935860000992, + "score": 0.575098943836209, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.28644027312006637, + "score": 0.23322806032691942, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.4643839364819269, + "score": 0.4970157115640211, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.5183632566399202, + "score": 0.32434551072073575, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.6705567848900439, + "score": 0.5461576315951293, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.39892980454447485, + "score": 0.417842986003915, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.5839375286411709, + "score": 0.6235945624226917, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.20065106893244083, + "score": 0.4684582258807146, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.41485356318318073, + "score": 0.6509870807795504, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.23119533406164058, + "score": 0.37892189586155534, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.4549622022003173, + "score": 0.5908052258359918, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.1501956901694662, + "score": 0.3354557799221337, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.3435352939078531, + "score": 0.5828616357620534, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.30692705311222085, + "score": 0.38540591321276524, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.5531231299653412, + "score": 0.6091815498132347, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.41613344165345995, + "score": 0.2913871477484173, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.5740077532098984, + "score": 0.4778519392615073, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.49132868804528823, + "score": 0.3859000637680225, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.6524450166860349, + "score": 0.5858315307170925, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.2929684584911775, + "score": 0.3738636268027588, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.5038324436049059, + "score": 0.5633496484582216, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.3414171640083141, + "score": 0.3850293035436385, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.5306256202657124, + "score": 0.5627108155348461, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.3858101625283812, + "score": 0.36775365397595855, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.5618844078335644, + "score": 0.5429838358858414, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.3116287423376191, + "score": 0.3742787779358338, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4893092447918963, + "score": 0.5507724077862277, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.4353732493964906, + "score": 0.2042633250999265, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.6078594152630662, + "score": 0.4024854380942464, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.40983351958195835, + "score": 0.21682957830342386, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.5980339788644404, + "score": 0.40328066851151617, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.27523578634783447, + "score": 0.0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.501243321339511, + "score": 0.17569336234053629, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.40840960406849836, + "score": 0.13989491400872253, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5662632887734669, + "score": 0.4095257685037439, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.047201037160775325, + "score": 0.37645276051115606, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.12237706077959995, + "score": 0.5980216031532829, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.40939284504147777, + "score": 0.22917125225310467, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5645111896180985, + "score": 0.47903700624901113, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.49428478171113605, + "score": 0.02228851669741669, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.6360862650323953, + "score": 0.16941662225476226, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.27106784138456536, + "score": 0.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.458287745564531, + "score": 0.15925566245534395, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.4034224234291925, + "score": 0.020022039661695485, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.5736798834726872, + "score": 0.18319796614102749, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.47117590712234436, + "score": 0.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.6157183058759933, + "score": 0.18337542465784618, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.4840329060094462, + "score": 0.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.6570537611908611, + "score": 0.2738250966440318, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.33677049851999397, + "score": 0.0, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.5762640586619034, + "score": 0.17457751379065342, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.5481366186143743, + "score": 0.2364341169976402, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.65502698375226, + "score": 0.44688068305416384, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.48936688255103167, + "score": 0.3275794528853699, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.6232884959088987, + "score": 0.490529412998314, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.2935294310015522, + "score": 0.3910557548848884, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.4403308077637572, + "score": 0.5640234702218941, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.49864013450084044, + "score": 0.33493420443764327, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.6473028953530363, + "score": 0.5494512089523403, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.15688445463098402, + "score": 0.18258051379187495, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.4711705838157902, + "score": 0.37144982797514564, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.6028678286611538, + "score": 0.5566300649554314, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.3109058809229358, + "score": 0.35425134311526146, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5045951829816013, + "score": 0.543224327229853, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.2822871796543221, + "score": 0.3446263661390609, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5208915029538709, + "score": 0.5457062469198075, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.1077205146963877, + "score": 0.1844277711083256, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.428338145564396, + "score": 0.39231379751700163, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.09543667505391068, + "score": 0.10142265089946709, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.40518998504409354, + "score": 0.23765231683034127, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.14087022592589463, + "score": 0.13326254700682963, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.42752370954120755, + "score": 0.37783345527529155, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.36576182289875453, + "score": 0.0836098993777203, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.5569403582137159, + "score": 0.28681946123560914, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.3304174876425892, + "score": 0.23361580096963977, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.5249852702194517, + "score": 0.4620323651475797, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.28983869034423043, + "score": 0.2464000786532921, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4975732770770436, + "score": 0.44961038359873023, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.2761854595042038, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.505027473861755, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.28060741458208943, + "score": 0.06850339366064954, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.496580338229036, + "score": 0.2947252945949938, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.07383026958055552, + "score": 0.0, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.18582113429299857, + "score": 0.20218909354463535, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.32814442346427775, + "score": 0.1512699697277094, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.5149345446415335, + "score": 0.3128685016104829, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2556346494160282, - "sentence_nr": 0 + "score": 0.15815751066481462, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4538035440310274, - "sentence_nr": 0 + "score": 0.5152611872266766, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.4102082155233312, - "sentence_nr": 0 + "score": 0.07407154448063642, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5474039587505726, - "sentence_nr": 0 + "score": 0.43145434527321425, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.22327767951697297, - "sentence_nr": 0 + "score": 0.12903696060775005, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4063556880747369, - "sentence_nr": 0 + "score": 0.456225988032654, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.26234851988380015, - "sentence_nr": 0 + "score": 0.12369892692249995, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4686295191568941, - "sentence_nr": 0 + "score": 0.44549610902403686, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.22472032138500259, - "sentence_nr": 0 + "score": 0.06647168102389285, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4363253004030211, - "sentence_nr": 0 + "score": 0.34350832619898364, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.14283509516492696, - "sentence_nr": 0 + "score": 0.12560672881768975, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.39080133039424786, - "sentence_nr": 0 + "score": 0.4969560260291519, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.2792735917259789, - "sentence_nr": 0 + "score": 0.12422788549118892, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.4652543566408097, - "sentence_nr": 0 + "score": 0.40222210564426, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3284145915841146, - "sentence_nr": 0 + "score": 0.09735981717515908, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.4777770768413136, - "sentence_nr": 0 + "score": 0.35288934658906385, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.08273178236238297, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.36399666460809255, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.32565974985390567, - "sentence_nr": 0 + "score": 0.13714845589364738, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5028101514284876, - "sentence_nr": 0 + "score": 0.45499281593451946, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.11564012893219777, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.29187222866434104, - "sentence_nr": 0 + "score": 0.44599783682350064, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.20649207653440943, - "sentence_nr": 0 + "score": 0.12601482779921785, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.45559573554011507, - "sentence_nr": 0 + "score": 0.43595665254608706, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2392120773016637, - "sentence_nr": 0 + "score": 0.3026566818840519, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.440445343487272, - "sentence_nr": 0 + "score": 0.5945859352092411, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.22424453668984448, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.41637444107955873, - "sentence_nr": 0 + "score": 0.2521233582161207, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2572733200413211, - "sentence_nr": 0 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.4520014138562526, - "sentence_nr": 0 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.23112644289004342, - "sentence_nr": 0 + "score": 0.07793031063789554, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4228120750850924, - "sentence_nr": 0 + "score": 0.3700181221537743, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.1733396766438206, - "sentence_nr": 0 + "score": 0.0867932999243575, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.37157614360073693, - "sentence_nr": 0 + "score": 0.4201964133235075, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3070946890889356, - "sentence_nr": 0 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5150001444865586, - "sentence_nr": 0 + "score": 0.3969463877642616, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.33726552749982586, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5166923315613857, - "sentence_nr": 0 + "score": 0.41649654108052436, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.34077616827498786, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5278331664063162, - "sentence_nr": 0 + "score": 0.3630576975795868, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0744904632040495, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4111163205685468, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.31260047665100127, - "sentence_nr": 0 + "score": 0.08767210132815903, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.48667320069984316, - "sentence_nr": 0 + "score": 0.40476518002703893, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.05624172669013078, - "sentence_nr": 0 + "score": 0.08616711094288851, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3317185957958262, - "sentence_nr": 0 + "score": 0.3696512763473903, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2624355454690498, - "sentence_nr": 0 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.48887901649623144, - "sentence_nr": 0 + "score": 0.4486368934849452, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.40487199173556226, - "sentence_nr": 0 + "score": 0.14738500064905094, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5637204315528265, - "sentence_nr": 0 + "score": 0.4659728395318289, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.40165053057541866, - "sentence_nr": 0 + "score": 0.15386029327005746, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5837756195280097, - "sentence_nr": 0 + "score": 0.43911482594829104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.40311197004738203, - "sentence_nr": 0 + "score": 0.10070927557742705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5788525108956781, - "sentence_nr": 0 + "score": 0.43718220262892105, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.30912713581280643, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5376618148723133, - "sentence_nr": 0 + "score": 0.3370100422576744, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.22800071662764984, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4763467106828393, - "sentence_nr": 0 + "score": 0.1946966569103724, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.2997057270104923, - "sentence_nr": 0 + "score": 0.0772718393063023, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5028660357670663, - "sentence_nr": 0 + "score": 0.4203683137304257, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.3874069559631556, - "sentence_nr": 0 + "score": 0.09084091756463074, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5570550964093942, - "sentence_nr": 0 + "score": 0.4286741659142759, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.45074681913051867, - "sentence_nr": 0 + "score": 0.06126604215610123, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5985843659278748, - "sentence_nr": 0 + "score": 0.3837677428398438, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.2697752741359869, - "sentence_nr": 0 + "score": 0.0756907193511249, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.46086885667795485, - "sentence_nr": 0 + "score": 0.4138725093679467, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3442651325185116, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.534774838547693, - "sentence_nr": 0 + "score": 0.16764957347186446, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.1195053737774238, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.13496104417905996, - "sentence_nr": 0 + "score": 0.4512136289975786, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.36516261117337495, - "sentence_nr": 0 + "score": 0.21748353646757182, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5425947356911068, - "sentence_nr": 0 + "score": 0.4462746462826943, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.5108628809804742, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6929396211173784, - "sentence_nr": 0 + "score": 0.4331286519146886, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.49872195941208947, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6907054265096231, - "sentence_nr": 0 + "score": 0.3538966478758119, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.45313578977486535, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6160993561903745, - "sentence_nr": 0 + "score": 0.4179644538349004, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.37640646218183, - "sentence_nr": 0 + "score": 0.11378204941109882, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5429063669356702, - "sentence_nr": 0 + "score": 0.4981472095171313, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.4331983607416391, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.601662300924314, - "sentence_nr": 0 + "score": 0.43759229210123524, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.4485994475252126, - "sentence_nr": 0 + "score": 0.10505106462290037, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6258984728025891, - "sentence_nr": 0 + "score": 0.4474870048911137, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5793415656031259, - "sentence_nr": 0 + "score": 0.1059352062327485, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7153451394318217, - "sentence_nr": 0 + "score": 0.4291550754056065, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5248291448433852, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6920934053021797, - "sentence_nr": 0 + "score": 0.4239838444198129, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.33244322003055665, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5420222318771111, - "sentence_nr": 0 + "score": 0.0009218289085545725, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.425742897803471, - "sentence_nr": 0 + "score": 0.15720527174368754, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6292455373063424, - "sentence_nr": 0 + "score": 0.47882285385622714, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0072148746031117554, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.09496152255049971, - "sentence_nr": 0 + "score": 0.3374178992279451, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4803498024083505, - "sentence_nr": 0 + "score": 0.15653859793617866, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5963395991179793, - "sentence_nr": 0 + "score": 0.43177798053127925, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.3335262554878992, - "sentence_nr": 0 + "score": 0.0982484177591637, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5258955094447381, - "sentence_nr": 0 + "score": 0.4109236039282987, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.3411271681324882, - "sentence_nr": 0 + "score": 0.10106439835419144, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5323123267352375, - "sentence_nr": 0 + "score": 0.44450926478634867, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2651736858432996, - "sentence_nr": 0 + "score": 0.0891537192318598, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4491383344282561, - "sentence_nr": 0 + "score": 0.3970634926176537, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3082082660061424, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5077609645706764, - "sentence_nr": 0 + "score": 0.16496711525651045, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.32151064813737534, - "sentence_nr": 0 + "score": 0.052359103292999656, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5058205933378546, - "sentence_nr": 0 + "score": 0.3805982553288677, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.30384210838236353, - "sentence_nr": 0 + "score": 0.0950136506275681, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.49573162353290035, - "sentence_nr": 0 + "score": 0.4372017487229785, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.4140944157226165, - "sentence_nr": 0 + "score": 0.12506460115047335, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.6110635706148037, - "sentence_nr": 0 + "score": 0.46140175133635725, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3007845437586152, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4720170373660879, - "sentence_nr": 0 + "score": 0.4404222773455128, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.1259356760989446, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.44568274520971096, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.35069370820755275, - "sentence_nr": 0 + "score": 0.10127171102984855, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5735580981959628, - "sentence_nr": 0 + "score": 0.4525620764847558, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.10833971870416897, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.0014196479273140264, - "sentence_nr": 0 + "score": 0.4467303749319595, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.27579736884967815, - "sentence_nr": 0 + "score": 0.16322494183480127, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.44560520221877703, - "sentence_nr": 0 + "score": 0.4815584993817062, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.37742688647401873, - "sentence_nr": 0 + "score": 0.14163299203710986, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5674314405993244, - "sentence_nr": 0 + "score": 0.3958314877752854, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.24828430598240606, - "sentence_nr": 0 + "score": 0.09463828889338871, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5078550622606068, - "sentence_nr": 0 + "score": 0.3398200805270262, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.34545319957597864, - "sentence_nr": 0 + "score": 0.0904087252785689, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5727052860304503, - "sentence_nr": 0 + "score": 0.41830513174690515, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4256604038587669, - "sentence_nr": 0 + "score": 0.1200100437012302, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.595254482532169, - "sentence_nr": 0 + "score": 0.4636227306109079, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.35948829980203323, - "sentence_nr": 0 + "score": 0.17208141302168437, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5639910704472698, - "sentence_nr": 0 + "score": 0.4542522451167506, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.3625286446151028, - "sentence_nr": 0 + "score": 0.21351902664706998, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6148737881972042, - "sentence_nr": 0 + "score": 0.5130443042033361, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4821945698340569, - "sentence_nr": 0 + "score": 0.06289570792563275, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6555631364280885, - "sentence_nr": 0 + "score": 0.3813881170279124, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4519603667438429, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6009987666643928, - "sentence_nr": 0 + "score": 0.38763756150559275, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.16269986423611488, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.5542271267412462, - "sentence_nr": 0 + "score": 0.19045679700622437, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7275049499508799, - "sentence_nr": 0 + "score": 0.4124342444810736, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.327910616954487, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5832612672351287, - "sentence_nr": 0 + "score": 0.2398247112527542, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.5350666712285949, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6501051146578934, - "sentence_nr": 0 + "score": 0.06939838145153245, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.339818403012025, - "sentence_nr": 0 + "score": 0.05275923024775565, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5156759219303986, - "sentence_nr": 0 + "score": 0.3724723203846839, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.4381699512774638, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5887410281104106, - "sentence_nr": 0 + "score": 0.3544628606759813, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.38968867962607934, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5581403039390647, - "sentence_nr": 0 + "score": 0.3371547585108182, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.44378279372807367, - "sentence_nr": 0 + "score": 0.12650809806003369, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5825113284353328, - "sentence_nr": 0 + "score": 0.4579202271851988, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.34093060419986554, - "sentence_nr": 0 + "score": 0.09676230489828269, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5050414552947896, - "sentence_nr": 0 + "score": 0.43266369498706486, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.32155220285195785, - "sentence_nr": 0 + "score": 0.1691386174483793, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5502401579986564, - "sentence_nr": 0 + "score": 0.4920789340026317, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.48871893597517396, - "sentence_nr": 0 + "score": 0.19135523280427486, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.6448535407289147, - "sentence_nr": 0 + "score": 0.49947805136320467, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.4647350187265495, - "sentence_nr": 0 + "score": 0.2356661678654945, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.6141693179612359, - "sentence_nr": 0 + "score": 0.5124350706386419, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2572787263311883, - "sentence_nr": 0 + "score": 0.14944432524273302, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4597049841535362, - "sentence_nr": 0 + "score": 0.4972796478830659, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.38621174932063007, - "sentence_nr": 0 + "score": 0.10588612806056373, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5629586598269498, - "sentence_nr": 0 + "score": 0.4068718481729766, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.10720391954020723, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.37219605281253065, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.431633629801714, - "sentence_nr": 0 + "score": 0.09793316925795417, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5611890334237722, - "sentence_nr": 0 + "score": 0.4297577431879659, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.12843096555088776, - "sentence_nr": 0 + "score": 0.0967458811247473, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3356201430079791, - "sentence_nr": 0 + "score": 0.4485783191522753, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.17727473966694943, - "sentence_nr": 0 + "score": 0.06851723496815999, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3475071694578125, - "sentence_nr": 0 + "score": 0.40911149660575097, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.20451416608402828, - "sentence_nr": 0 + "score": 0.14184998906630783, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.38185285396290036, - "sentence_nr": 0 + "score": 0.44498159653494584, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.24678030799496634, - "sentence_nr": 0 + "score": 0.13308739447486365, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.4463603005685723, - "sentence_nr": 0 + "score": 0.3932447622969156, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.17736142488062245, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.3654025502565916, - "sentence_nr": 0 + "score": 0.36741937011390374, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.14222939605129875, - "sentence_nr": 0 + "score": 0.11220450894323894, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.32212719342865237, - "sentence_nr": 0 + "score": 0.46129962837218175, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2562205755075293, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.46066225689165846, - "sentence_nr": 0 + "score": 0.3645369664653625, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.24809323900653618, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.455254011012136, - "sentence_nr": 0 + "score": 0.3275292968031138, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.08932983819566953, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.37462132890676997, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.22798424876104878, - "sentence_nr": 0 + "score": 0.15453746478246141, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4315883077530936, - "sentence_nr": 0 + "score": 0.4413516563123831, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.1383193561213217, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0013506212857914643, - "sentence_nr": 0 + "score": 0.4229717720106369, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.22750547588410633, - "sentence_nr": 0 + "score": 0.14846392828893068, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4160796302144522, - "sentence_nr": 0 + "score": 0.44939103256256696, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3536676112393946, - "sentence_nr": 0 + "score": 0.10713148568717314, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5252283198216768, - "sentence_nr": 0 + "score": 0.41522111700393083, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5494025263062274, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6860161543947312, - "sentence_nr": 0 + "score": 0.3539070801331386, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2636405082687104, - "sentence_nr": 0 + "score": 0.11340129142744679, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5072009470421238, - "sentence_nr": 0 + "score": 0.4168800407013454, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3994138413590059, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6335699196668345, - "sentence_nr": 0 + "score": 0.3702987017023586, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.32440820201863096, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5816024759666973, - "sentence_nr": 0 + "score": 0.3241317524160092, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.30860365223174097, - "sentence_nr": 0 + "score": 0.07351652222518425, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5323991480984563, - "sentence_nr": 0 + "score": 0.3862617013651048, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.5275588446482796, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6824121095569455, - "sentence_nr": 0 + "score": 0.30718853768673293, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.4698824517223119, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6509974368827985, - "sentence_nr": 0 + "score": 0.3499024158832446, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.12587301409115934, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.43278573034203477, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.465943811426769, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.646929348297808, - "sentence_nr": 0 + "score": 0.4312123024580457, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.001973164956590371, - "sentence_nr": 0 + "score": 0.24946780875926136, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.39626726411474644, - "sentence_nr": 0 + "score": 0.06500924965575555, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5928013371853409, - "sentence_nr": 0 + "score": 0.389301118498321, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.2348553453946444, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4891959123914518, - "sentence_nr": 0 + "score": 0.29873361351172023, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.4222656487192343, - "sentence_nr": 0 + "score": 0.08186981924084771, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.6421614792137705, - "sentence_nr": 0 + "score": 0.36422083962860535, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.20174173621464261, - "sentence_nr": 0 + "score": 0.07752927781917028, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.5179166118048267, - "sentence_nr": 0 + "score": 0.3238609427019678, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.5116634146141776, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.6950231685488834, - "sentence_nr": 0 + "score": 0.3092395616495983, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.27720246067551324, - "sentence_nr": 0 + "score": 0.0811151580341062, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.543353961410956, - "sentence_nr": 0 + "score": 0.4179228886149028, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.1658317981046275, - "sentence_nr": 0 + "score": 0.04318453178079916, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.4396479745504188, - "sentence_nr": 0 + "score": 0.3381884955798567, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.5076222240986388, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.6899856343139605, - "sentence_nr": 0 + "score": 0.15333726274185422, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.42072143291659103, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.6265956117333142, - "sentence_nr": 0 + "score": 0.14176967102285878, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.00591195237335994, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.15760341237876357, - "sentence_nr": 0 + "score": 0.15856726741880453, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.5079711118438801, - "sentence_nr": 0 + "score": 0.06061016244701235, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.6900890595896133, - "sentence_nr": 0 + "score": 0.3480533968220821, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.07446712399912313, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.30485244492635144, - "sentence_nr": 0 + "score": 0.3107132702855867, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.3394216003840941, - "sentence_nr": 0 + "score": 0.06622410994100032, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5399361239256494, - "sentence_nr": 0 + "score": 0.42506963891617355, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.3461114139111442, - "sentence_nr": 0 + "score": 0.08459573412751416, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.5537111972654953, - "sentence_nr": 0 + "score": 0.4172605432414846, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.353179331599201, - "sentence_nr": 0 + "score": 0.08986406706995408, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.5558881348090785, - "sentence_nr": 0 + "score": 0.44470674434718094, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.33522833358360765, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.534195929930943, - "sentence_nr": 0 + "score": 0.4109749814872678, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.3279338213872338, - "sentence_nr": 0 + "score": 0.07565762629954577, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.5206722319482356, - "sentence_nr": 0 + "score": 0.3606232238015037, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.1900249500296748, - "sentence_nr": 0 + "score": 0.12594843055469976, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.4570054063295732, - "sentence_nr": 0 + "score": 0.4353555563309006, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.389868366744335, - "sentence_nr": 0 + "score": 0.07142908588092715, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.6181891240638018, - "sentence_nr": 0 + "score": 0.3642310370662869, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.30595231029570097, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.5076226900210099, - "sentence_nr": 0 + "score": 0.3186898662502609, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.37858398735109683, - "sentence_nr": 0 + "score": 0.06656213940646744, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.5535757116038853, - "sentence_nr": 0 + "score": 0.3842510919126927, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.06946125044973972, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.37972229376763555, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.3201978307646018, - "sentence_nr": 0 + "score": 0.15996142821020284, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.5281184078781382, - "sentence_nr": 0 + "score": 0.42995669154818883, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.1627842130495941, - "sentence_nr": 0 + "score": 0.06922310590511903, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3513085830979839, - "sentence_nr": 0 + "score": 0.39694083278594716, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.30576442771176066, - "sentence_nr": 0 + "score": 0.1339039164909805, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.5175418593642837, - "sentence_nr": 0 + "score": 0.44979655276903346, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.3734491516745214, - "sentence_nr": 0 + "score": 0.16234678312329395, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.5499493819792871, - "sentence_nr": 0 + "score": 0.4114313966468408, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.4397415106513502, - "sentence_nr": 0 + "score": 0.14108777831558816, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.5907735810868658, - "sentence_nr": 0 + "score": 0.38610201135781486, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.31308824228412185, - "sentence_nr": 0 + "score": 0.07152747748412269, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.4950165423717857, - "sentence_nr": 0 + "score": 0.38927458491364797, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.28646584019908145, - "sentence_nr": 0 + "score": 0.0889604331153271, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.4746124656486252, - "sentence_nr": 0 + "score": 0.4174106361046784, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.21240535233702176, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.3985917300395283, - "sentence_nr": 0 + "score": 0.35972456016417403, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.0857750978817917, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.18416226663843008, - "sentence_nr": 0 + "score": 0.44136113805162547, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.409963740738593, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.5597983233407279, - "sentence_nr": 0 + "score": 0.29764050036303846, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.030041173262958625, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.16461275738712375, - "sentence_nr": 0 + "score": 0.2883740704360469, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2604470328007762, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.28710736118585223, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.47019373110040275, - "sentence_nr": 0 + "score": 0.2750949112536697, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.02574148824389794, - "sentence_nr": 0 + "score": 0.369396410785335, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.2111955699760469, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.428831679677381, - "sentence_nr": 0 + "score": 0.30886876402238045, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.3612717557348476, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.5558371668340614, - "sentence_nr": 0 + "score": 0.3445489778722215, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.46890796443667687, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.6445795993451092, - "sentence_nr": 0 + "score": 0.22034235744543199, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.30704694388456133, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.5217468869740803, - "sentence_nr": 0 + "score": 0.3992343412084987, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.3675770737978913, - "sentence_nr": 0 + "score": 0.12454093367377822, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5833872029429698, - "sentence_nr": 0 + "score": 0.45400457519342263, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.2381080412543041, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.4678770958208047, - "sentence_nr": 0 + "score": 0.29211251612445716, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.12508074021419405, - "sentence_nr": 0 + "score": 0.08742637130044478, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.39088781423976093, - "sentence_nr": 0 + "score": 0.3782754387193616, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.4481437122587742, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.6475959138561268, - "sentence_nr": 0 + "score": 0.3843618124722185, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.3699375619378516, - "sentence_nr": 0 + "score": 0.10567309578898446, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5932397042974766, - "sentence_nr": 0 + "score": 0.4024349171516437, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.32603788130544104, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.4346391355101555, - "sentence_nr": 0 + "score": 0.13894512516215204, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.648783727613815, - "sentence_nr": 0 + "score": 0.483078120317575, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.24647819790998704, - "sentence_nr": 0 + "score": 0.0832724096908118, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.4673628408395248, - "sentence_nr": 0 + "score": 0.4097982251907115, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.35045670972629744, - "sentence_nr": 0 + "score": 0.09843551021314972, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.564796827816794, - "sentence_nr": 0 + "score": 0.44345815368179514, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.41661325369918395, - "sentence_nr": 0 + "score": 0.15357179047039304, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.587715140145979, - "sentence_nr": 0 + "score": 0.4395965605263733, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.3937759330018993, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.5961746226614889, - "sentence_nr": 0 + "score": 0.3352216651363677, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.4088174428659509, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.5924365007019256, - "sentence_nr": 0 + "score": 0.2926736955448575, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.3565944577029545, - "sentence_nr": 0 + "score": 0.058474735537506775, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5419443217291802, - "sentence_nr": 0 + "score": 0.346711996349685, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.5003903156428934, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.6591036004593714, - "sentence_nr": 0 + "score": 0.32876484301179987, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.4075013846828396, - "sentence_nr": 0 + "score": 0.14849717699290216, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.609574810318951, - "sentence_nr": 0 + "score": 0.4416362145529488, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.48070461563699834, - "sentence_nr": 0 + "score": 0.0862684017016977, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.6555925126677848, - "sentence_nr": 0 + "score": 0.46311606179286086, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.46685213488332356, - "sentence_nr": 0 + "score": 0.08054744999594665, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.6452685695102438, - "sentence_nr": 0 + "score": 0.3910533825433727, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.17827499805988958, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.4226865520698467, - "sentence_nr": 0 + "score": 0.42292922955918455, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.509712898465703, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.6788015977861386, - "sentence_nr": 0 + "score": 0.14370950122782516, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.0701361033487376, - "sentence_nr": 0 + "score": 0.3334514572265135, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.4246487921338825, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.6150058842025391, - "sentence_nr": 0 + "score": 0.21268091254698024, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.16285971091078436, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.43885470392891923, - "sentence_nr": 0 + "score": 0.12067839739874531, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.37339369029886144, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.5432112723704581, - "sentence_nr": 0 + "score": 0.3233637515119462, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.23751632756038837, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.4414396968637268, - "sentence_nr": 0 + "score": 0.31176467991525436, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.23132615410621146, - "sentence_nr": 0 + "score": 0.08160236983918483, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.43514359502154976, - "sentence_nr": 0 + "score": 0.38567653709947824, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.1741525384512767, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.45255374179790936, - "sentence_nr": 0 + "score": 0.3636314071779547, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.11702528754872281, - "sentence_nr": 0 + "score": 0.07352808725672978, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.35961444717857005, - "sentence_nr": 0 + "score": 0.39849401484916575, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.38197969936092163, - "sentence_nr": 0 + "score": 0.0948243550653547, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.5508810570148596, - "sentence_nr": 0 + "score": 0.40688622415675096, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.2697061457592779, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.45539079735897503, - "sentence_nr": 0 + "score": 0.37941443386230733, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0754791629755296, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4115037991203147, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.28350073467974646, - "sentence_nr": 0 + "score": 0.17376142320673926, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4913615840203272, - "sentence_nr": 0 + "score": 0.4494840281694199, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.09628144140511948, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.0012419274714356686, - "sentence_nr": 0 + "score": 0.3988415038006601, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.2274056095104063, - "sentence_nr": 0 + "score": 0.08810203169380636, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.45103469245105887, - "sentence_nr": 0 + "score": 0.4085631076024389, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.21791041776703116, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.44004369960566136, - "sentence_nr": 0 + "score": 0.3490597215692333, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.46773190351581395, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.6215065422343401, - "sentence_nr": 0 + "score": 0.363944181125048, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.2799135631577256, - "sentence_nr": 0 + "score": 0.3406214634850959, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.10136628610815898, - "sentence_nr": 0 + "score": 0.15565663466238167, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.30045915824023645, - "sentence_nr": 0 + "score": 0.45102089786807525, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.1129192185025187, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.3036955697945895, - "sentence_nr": 0 + "score": 0.3349337342755207, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.1146190984378276, - "sentence_nr": 0 + "score": 0.19306612958933164, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.2705636478589466, - "sentence_nr": 0 + "score": 0.4566094829965023, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.27735384192405904, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.5117239199116311, - "sentence_nr": 0 + "score": 0.15165087037620367, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.2773615322238364, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.4616718172834648, - "sentence_nr": 0 + "score": 0.10988031996776393, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.14384707977041108, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.27918670410574553, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.4946319454441461, - "sentence_nr": 0 + "score": 0.17231483245958562, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.001482799525504152, - "sentence_nr": 0 + "score": 0.26083297460286664, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.11313747467095658, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.31131411877123494, - "sentence_nr": 0 + "score": 0.2205187870837211, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.0937099995586274, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.2035993189596312, - "sentence_nr": 0 + "score": 0.38638693017010634, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.13511029141047634, - "sentence_nr": 0 + "score": 0.06087893264282183, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.36856155052346085, - "sentence_nr": 0 + "score": 0.30404764547641244, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.17779867452221493, - "sentence_nr": 0 + "score": 0.41200704988717746, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.109333912337143, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.1806947117206154, - "sentence_nr": 0 + "score": 0.43946533504329827, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.17199314222249618, - "sentence_nr": 0 + "score": 0.3875031655866923, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.14527200081334513, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.18992468117577804, - "sentence_nr": 0 + "score": 0.4245172781893951, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.2533256789438013, - "sentence_nr": 0 + "score": 0.12099786399361606, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.46990234871498904, - "sentence_nr": 0 + "score": 0.4447762461237164, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.1218148752860121, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.345672681124001, - "sentence_nr": 0 + "score": 0.35906265614758676, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.10096459770150681, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.17608422487251038, - "sentence_nr": 0 + "score": 0.4189740217714419, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.15711076787374778, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.17408200912027585, - "sentence_nr": 0 + "score": 0.4926610996660017, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.12253628106911543, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.2005845691509901, - "sentence_nr": 0 + "score": 0.41098604819939544, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.054746206230409135, - "sentence_nr": 0 + "score": 0.08517707813747888, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.20233084789265965, - "sentence_nr": 0 + "score": 0.4298965032520897, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.20075037608245913, - "sentence_nr": 0 + "score": 0.13594665641498668, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4313422346882818, - "sentence_nr": 0 + "score": 0.47621282367548656, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.1874333361540541, - "sentence_nr": 0 + "score": 0.12066287439499573, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.41793053821849296, - "sentence_nr": 0 + "score": 0.40225318320388664, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.3716703379730988, - "sentence_nr": 0 + "score": 0.16231893029395061, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.5800216707448408, - "sentence_nr": 0 + "score": 0.4858308027555531, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.1617336445898746, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.41103371741191813, - "sentence_nr": 0 + "score": 0.3781094023262652, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.2939876705913701, - "sentence_nr": 0 + "score": 0.052244516140907096, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.513822906290756, - "sentence_nr": 0 + "score": 0.351436961102141, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.34882223508522014, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.5382666998696708, - "sentence_nr": 0 + "score": 0.26865126568707876, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.16815129512086885, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.42564250588688346, - "sentence_nr": 0 + "score": 0.17419420900027405, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.3337393033483562, - "sentence_nr": 0 + "score": 0.07291105107725455, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5341770443205455, - "sentence_nr": 0 + "score": 0.4474906527730671, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.3334625511884235, - "sentence_nr": 0 + "score": 0.08334085822278188, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5084204520879435, - "sentence_nr": 0 + "score": 0.40282723830388284, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.43467677589817527, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.6164945332495145, - "sentence_nr": 0 + "score": 0.33037125702748205, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.19883793649788686, - "sentence_nr": 0 + "score": 0.08714838249931423, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.4425054083753717, - "sentence_nr": 0 + "score": 0.3692825189624461, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.24129507267532274, - "sentence_nr": 0 + "score": 0.09127577115927074, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.4979085300830714, - "sentence_nr": 0 + "score": 0.3300669374207929, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.15080316480304565, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.4424628792965376, - "sentence_nr": 0 + "score": 0.3231709973877731, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.2777349520199055, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.5312509449503231, - "sentence_nr": 0 + "score": 0.1680802224904863, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.20390263030337064, - "sentence_nr": 0 + "score": 0.08434660455803612, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.4345747929502553, - "sentence_nr": 0 + "score": 0.32335639685468925, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.25059044332210606, - "sentence_nr": 0 + "score": 0.909878624371155, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5062568722858068, - "sentence_nr": 0 + "score": 0.9494599978334789, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.058621811091044064, - "sentence_nr": 0 + "score": 0.6484538568755306, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.26495616287092694, - "sentence_nr": 0 + "score": 0.8387015535622947, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.2263798936383913, - "sentence_nr": 0 + "score": 0.9625248317849852, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5010204752876567, - "sentence_nr": 0 + "score": 0.9799603794887166, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.37144367036148984, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.5493384036554019, - "sentence_nr": 0 + "score": 0.17236959754271308, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.3165559728474609, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.5621775968535822, - "sentence_nr": 0 + "score": 0.17066745219661572, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.16207910248424867, - "sentence_nr": 0 + "score": 0.17964021028231922, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.2656097416710467, - "sentence_nr": 0 + "score": 0.05270938682743268, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.49446617915326735, - "sentence_nr": 0 + "score": 0.31252925174187013, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.1075481111616894, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.003921568627450981, - "sentence_nr": 0 + "score": 0.3738814601144911, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.30060279696865555, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5034923934195839, - "sentence_nr": 0 + "score": 0.15334066204940114, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.17913113678266074, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.39874837064673946, - "sentence_nr": 0 + "score": 0.1179671428128192, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.39725392028587103, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.5469018582137435, - "sentence_nr": 0 + "score": 0.35902184995166087, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.3532549308527307, - "sentence_nr": 0 + "score": 0.29668758510830123, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.08124065769691519, - "sentence_nr": 0 + "score": 0.0529715946034933, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.36179606551800264, - "sentence_nr": 0 + "score": 0.35557346479092056, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.14976259597799593, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.3842337676785057, - "sentence_nr": 0 + "score": 0.3447636250916266, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.18524922432663024, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4677488632814114, - "sentence_nr": 0 + "score": 0.36010213387059153, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.2117138550702324, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.4604849061460804, - "sentence_nr": 0 + "score": 0.1569159469136538, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.2566428979550943, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.472012087459169, - "sentence_nr": 0 + "score": 0.14702176025137792, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.15207203397909086, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.29072641495011164, - "sentence_nr": 0 + "score": 0.38870674200492367, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5355182083154902, - "sentence_nr": 0 + "score": 0.6484380084879691, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.05513343823020891, - "sentence_nr": 0 + "score": 0.3961285597009415, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.24973538251018115, - "sentence_nr": 0 + "score": 0.6148751441350505, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2410257388716231, - "sentence_nr": 0 + "score": 0.4923751299732868, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.47108004929437347, - "sentence_nr": 0 + "score": 0.6853756490381199, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3625318570509803, - "sentence_nr": 0 + "score": 0.11133996756497437, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5303179877188419, - "sentence_nr": 0 + "score": 0.4410280353998367, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.35328951154883514, - "sentence_nr": 0 + "score": 0.17374951565433233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5286973900062114, - "sentence_nr": 0 + "score": 0.45325597884524305, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3816118513389601, - "sentence_nr": 0 + "score": 0.17743299460161885, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5423632561644341, - "sentence_nr": 0 + "score": 0.43071271897416463, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3825612041950578, - "sentence_nr": 0 + "score": 0.26459538953931094, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5448874224723139, - "sentence_nr": 0 + "score": 0.5272178908335121, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.2914614724265088, - "sentence_nr": 0 + "score": 0.26801022984888695, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.49272317726959486, - "sentence_nr": 0 + "score": 0.5654883864995515, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.31701648962901274, - "sentence_nr": 0 + "score": 0.21665407194210906, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5369836185626417, - "sentence_nr": 0 + "score": 0.4344921442639243, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.4536812813438368, - "sentence_nr": 0 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6033688389068195, - "sentence_nr": 0 + "score": 0.6037023613177924, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.4388695885558457, - "sentence_nr": 0 + "score": 0.3574583793293068, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5988319380396017, - "sentence_nr": 0 + "score": 0.5924115119819969, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.37994652561206577, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.6464467277069994, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3891908674355695, - "sentence_nr": 0 + "score": 0.2158914621804855, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5638438488395793, - "sentence_nr": 0 + "score": 0.5448184155666022, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0562284009388899, - "sentence_nr": 0 + "score": 0.22292726306270316, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.2499832582979363, - "sentence_nr": 0 + "score": 0.5653789747970112, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.39803983519369723, - "sentence_nr": 0 + "score": 0.09362261118571368, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5925122761777685, - "sentence_nr": 0 + "score": 0.3452056942265759, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4427412215990632, - "sentence_nr": 0 + "score": 0.18031307339768174, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6222208791908107, - "sentence_nr": 0 + "score": 0.522164454804456, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3709917965006414, - "sentence_nr": 0 + "score": 0.21403222128228389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6015654773262525, - "sentence_nr": 0 + "score": 0.563121432204311, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3899394268518547, - "sentence_nr": 0 + "score": 0.18917620656425485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.594841687625348, - "sentence_nr": 0 + "score": 0.4346170232980484, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.37937217700502807, - "sentence_nr": 0 + "score": 0.2999092588227898, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.585570521448987, - "sentence_nr": 0 + "score": 0.5505916495384416, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2935817756923911, - "sentence_nr": 0 + "score": 0.4054983797456263, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5258167932749879, - "sentence_nr": 0 + "score": 0.6264774230839022, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3069539363570848, - "sentence_nr": 0 + "score": 0.420450507904553, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.54212160924242, - "sentence_nr": 0 + "score": 0.6503146347305717, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3837706446662323, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6114841751733563, - "sentence_nr": 0 + "score": 0.4263684749347053, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.47036083421186914, - "sentence_nr": 0 + "score": 0.20051119758906127, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6284217372117649, - "sentence_nr": 0 + "score": 0.5334791309401924, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.24894072982768842, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5212235893093335, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.43771450361962905, - "sentence_nr": 0 + "score": 0.2562849004088193, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6241615593947294, - "sentence_nr": 0 + "score": 0.5767019342009202, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3535002370419364, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.002470966147763776, - "sentence_nr": 0 + "score": 0.5959879218348465, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.41876701425580165, - "sentence_nr": 0 + "score": 0.393613605227227, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6023072470951277, - "sentence_nr": 0 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.23150269995638142, - "sentence_nr": 0 + "score": 0.2465888500427759, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.46961020207805865, - "sentence_nr": 0 + "score": 0.5221084445696768, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.23712278533862596, - "sentence_nr": 0 + "score": 0.35983766090218355, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5116265380743877, - "sentence_nr": 0 + "score": 0.5862251404739759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.16820174403705807, - "sentence_nr": 0 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4159468803310715, - "sentence_nr": 0 + "score": 0.41020178654369294, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.14411824146646438, - "sentence_nr": 0 + "score": 0.22150370805587954, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3888201933329776, - "sentence_nr": 0 + "score": 0.5463488388082953, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.20772664892358625, - "sentence_nr": 0 + "score": 0.2971752224486841, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4530317770012902, - "sentence_nr": 0 + "score": 0.605133664481872, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.08742348900087889, - "sentence_nr": 0 + "score": 0.2329856851831642, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3483375322546142, - "sentence_nr": 0 + "score": 0.5405751250637106, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.22620136486770118, - "sentence_nr": 0 + "score": 0.39461811323775403, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.48605914376103504, - "sentence_nr": 0 + "score": 0.5655204109921267, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.17808024265318068, - "sentence_nr": 0 + "score": 0.2786312783602775, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4325975219749186, - "sentence_nr": 0 + "score": 0.4836796407825139, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.41756686236967944, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5616829345739638, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.21950096276063155, - "sentence_nr": 0 + "score": 0.25564177137418986, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4615148727332789, - "sentence_nr": 0 + "score": 0.49870011615602194, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.39579112101105834, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.0014697236919459144, - "sentence_nr": 0 + "score": 0.6431490866428237, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.21002599862757135, - "sentence_nr": 0 + "score": 0.38189567401226293, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4392687670949058, - "sentence_nr": 0 + "score": 0.6154314825900052, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.30495379106243414, - "sentence_nr": 0 + "score": 0.15985840708020788, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.503838460756843, - "sentence_nr": 0 + "score": 0.44951053332729884, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.5238129782835811, - "sentence_nr": 0 + "score": 0.35253338922743144, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6833665118503387, - "sentence_nr": 0 + "score": 0.6487975154557831, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.3682745409081855, - "sentence_nr": 0 + "score": 0.2126707920684064, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5329336102063273, - "sentence_nr": 0 + "score": 0.4659908460634765, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.32747218107784076, - "sentence_nr": 0 + "score": 0.27217589854489177, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.54609482853432, - "sentence_nr": 0 + "score": 0.5756343666825848, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.28824791607535494, - "sentence_nr": 0 + "score": 0.24513414885202045, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5206551995745454, - "sentence_nr": 0 + "score": 0.5476647609559218, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.324365548882549, - "sentence_nr": 0 + "score": 0.23240102389974368, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5907155236217757, - "sentence_nr": 0 + "score": 0.4973274282641141, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.5679645191995755, - "sentence_nr": 0 + "score": 0.28467215304840787, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.7028524535352202, - "sentence_nr": 0 + "score": 0.4298052820106505, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4619461496096305, - "sentence_nr": 0 + "score": 0.28653528640783255, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6211197769248664, - "sentence_nr": 0 + "score": 0.5053636612097852, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1773290356139862, - "sentence_nr": 0 + "score": 0.17979384730979156, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.45665875592491983, - "sentence_nr": 0 + "score": 0.4177311931467539, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.48063509995871484, - "sentence_nr": 0 + "score": 0.15426765225005337, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6455694687360541, - "sentence_nr": 0 + "score": 0.45289625960131974, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.31326946419057006, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.001876876876876877, - "sentence_nr": 0 + "score": 0.591171976889058, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4024307797146222, - "sentence_nr": 0 + "score": 0.1702602472176709, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5799489971425524, - "sentence_nr": 0 + "score": 0.4366640707779677, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.09425983742608171, - "sentence_nr": 0 + "score": 0.7096224667917136, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.32871133484905984, - "sentence_nr": 0 + "score": 0.8862932371217843, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.1500767455847696, - "sentence_nr": 0 + "score": 0.5294442646627652, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.35247049201056063, - "sentence_nr": 0 + "score": 0.7281375072835307, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.933651069586263, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.25911153048152963, - "sentence_nr": 0 + "score": 0.9586507529693243, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.12339725436856788, - "sentence_nr": 0 + "score": 0.33713757310040376, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.30862388504827054, - "sentence_nr": 0 + "score": 0.5731908178757754, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.03683469030327237, - "sentence_nr": 0 + "score": 0.4162915990459618, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.1883651540330025, - "sentence_nr": 0 + "score": 0.5970097205621886, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3816408219023713, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.2475555473086587, - "sentence_nr": 0 + "score": 0.5784105768028126, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.18059014320852598, - "sentence_nr": 0 + "score": 0.19129143021561437, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4126878831445088, - "sentence_nr": 0 + "score": 0.390473445537339, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.08952221293821708, - "sentence_nr": 0 + "score": 0.25848476545940924, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3302185725289447, - "sentence_nr": 0 + "score": 0.4897308313348651, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.18398226639192106, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.37285010531146734, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.11089707106904065, - "sentence_nr": 0 + "score": 0.27057949011516347, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.32257370439479693, - "sentence_nr": 0 + "score": 0.5644281635271426, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.29851690541541476, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.1542381010660205, - "sentence_nr": 0 + "score": 0.6224209860013706, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.10041397006304215, - "sentence_nr": 0 + "score": 0.26958884543190903, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.267235994708103, - "sentence_nr": 0 + "score": 0.5631664732610485, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.24285172240675165, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.1714827465806386, - "sentence_nr": 0 + "score": 0.4655392375590772, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.20461279328052204, - "sentence_nr": 0 + "score": 0.29796912700911177, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.40700264333409225, - "sentence_nr": 0 + "score": 0.5158892363484622, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.022279489478813384, - "sentence_nr": 0 + "score": 0.4005296397635166, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.1674529343985772, - "sentence_nr": 0 + "score": 0.6201785376974677, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.15383749998898477, - "sentence_nr": 0 + "score": 0.07276375309803214, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.31499362867220904, - "sentence_nr": 0 + "score": 0.38861839385008856, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0294817052888944, - "sentence_nr": 0 + "score": 0.17377261603583774, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.2003567940058514, - "sentence_nr": 0 + "score": 0.4342710497791623, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.023921714345296125, - "sentence_nr": 0 + "score": 0.15956483578595942, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.19521241186114444, - "sentence_nr": 0 + "score": 0.425693420655628, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.42252912000328696, - "sentence_nr": 0 + "score": 0.35205535634937346, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6107603222807394, - "sentence_nr": 0 + "score": 0.5769772651090223, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.33631398011857205, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.15710835559719724, - "sentence_nr": 0 + "score": 0.6332428715049205, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.2323385180696658, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5019509292309764, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.24624631147355844, - "sentence_nr": 0 + "score": 0.21108332811806296, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4245128586654577, - "sentence_nr": 0 + "score": 0.5847750744232335, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2453238227047589, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.05997773337422933, - "sentence_nr": 0 + "score": 0.55017080577881, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.15594400017066484, - "sentence_nr": 0 + "score": 0.22952177306405494, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.35666553322400163, - "sentence_nr": 0 + "score": 0.5279520952576137, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4763809450534613, - "sentence_nr": 0 + "score": 0.3520774812078196, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6797378130452167, - "sentence_nr": 0 + "score": 0.5735788202105873, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.36983487280597815, - "sentence_nr": 0 + "score": 0.2063529291350913, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5775204256764592, - "sentence_nr": 0 + "score": 0.41364248023079064, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3926191044336021, - "sentence_nr": 0 + "score": 0.3618488169166299, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5853598001081626, - "sentence_nr": 0 + "score": 0.5708179622131996, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.392855031610931, - "sentence_nr": 0 + "score": 0.1506914981676572, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5596297716783123, - "sentence_nr": 0 + "score": 0.49409850038698094, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3242365809732156, - "sentence_nr": 0 + "score": 0.10586140133972588, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.49383937848408926, - "sentence_nr": 0 + "score": 0.4674053477944039, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3757402904418656, - "sentence_nr": 0 + "score": 0.1712766252338756, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5824540571269318, - "sentence_nr": 0 + "score": 0.5225554962608486, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.48799726436702184, - "sentence_nr": 0 + "score": 0.2797290030028961, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6640059364745422, - "sentence_nr": 0 + "score": 0.5092945860838002, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.4657084160452196, - "sentence_nr": 0 + "score": 0.2703645496410475, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6161150236110055, - "sentence_nr": 0 + "score": 0.5129310433304475, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.025129524427934438, - "sentence_nr": 0 + "score": 0.2709079038456153, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.18707827466681354, - "sentence_nr": 0 + "score": 0.447458019441992, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.5523722075163756, - "sentence_nr": 0 + "score": 0.26036802768146033, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.7123995987021648, - "sentence_nr": 0 + "score": 0.5255752089611478, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.003982293944449671, - "sentence_nr": 0 + "score": 0.2492031334256811, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.0970559370254647, - "sentence_nr": 0 + "score": 0.4923163374806021, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.4340730821892422, - "sentence_nr": 0 + "score": 0.2907608105126149, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.586288470842118, - "sentence_nr": 0 + "score": 0.5445465034944268, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4865718767050507, - "sentence_nr": 0 + "score": 0.240340920378981, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6519330394001581, - "sentence_nr": 0 + "score": 0.4624667456597986, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4961186750382622, - "sentence_nr": 0 + "score": 0.43104504141832617, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6420056154822653, - "sentence_nr": 0 + "score": 0.5953439401847398, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4047209070683015, - "sentence_nr": 0 + "score": 0.27907188689389983, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5746231903096143, - "sentence_nr": 0 + "score": 0.5093017176589221, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.4349265118717251, - "sentence_nr": 0 + "score": 0.35592474790742606, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6182013213050582, - "sentence_nr": 0 + "score": 0.5565115125775245, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.32844247965411666, - "sentence_nr": 0 + "score": 0.20863984464930022, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5261499162036236, - "sentence_nr": 0 + "score": 0.45879801940552783, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.41833670648048593, - "sentence_nr": 0 + "score": 0.2695149221768555, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5894419890247544, - "sentence_nr": 0 + "score": 0.4713033964653895, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.5093380551269019, - "sentence_nr": 0 + "score": 0.27075075499555246, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6691377633118654, - "sentence_nr": 0 + "score": 0.5201548999535662, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.4749320079832654, - "sentence_nr": 0 + "score": 0.27338789256007584, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6616558999065059, - "sentence_nr": 0 + "score": 0.5429269981031598, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.2075953797357176, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4344742362498603, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.46925131548107546, - "sentence_nr": 0 + "score": 0.2666372228396489, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6524942397625267, - "sentence_nr": 0 + "score": 0.5839132669613946, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.21825699659573294, - "sentence_nr": 0 + "score": 0.3703971546860334, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.4251389388937374, - "sentence_nr": 0 + "score": 0.6509854048597393, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.22342876267947934, - "sentence_nr": 0 + "score": 0.3020089249326176, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.46128579490017735, - "sentence_nr": 0 + "score": 0.5666791239956741, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.20522978206415157, - "sentence_nr": 0 + "score": 0.2667836062177809, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4816367810257562, - "sentence_nr": 0 + "score": 0.4889374373828587, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2607066928529267, - "sentence_nr": 0 + "score": 0.1515551103099189, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5190821165076681, - "sentence_nr": 0 + "score": 0.49455791760408774, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3651499702707945, - "sentence_nr": 0 + "score": 0.1059786102229136, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.539793217489328, - "sentence_nr": 0 + "score": 0.2561557976916047, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.32231048454445776, - "sentence_nr": 0 + "score": 0.23919877618601593, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5391040134655213, - "sentence_nr": 0 + "score": 0.5302876334280949, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.24664751641319077, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.1775108912093685, - "sentence_nr": 0 + "score": 0.48702383483350364, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2103019561790119, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.19086386208696812, - "sentence_nr": 0 + "score": 0.4375454771782611, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.27075549023715834, - "sentence_nr": 0 + "score": 0.3556610867487636, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5063680799048665, - "sentence_nr": 0 + "score": 0.5896623713361566, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.1518704145788631, - "sentence_nr": 0 + "score": 0.43209473956081024, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4226430284557875, - "sentence_nr": 0 + "score": 0.6466471725002415, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.343734330975999, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.17966694002432385, - "sentence_nr": 0 + "score": 0.5961090979865409, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.29041968071488244, - "sentence_nr": 0 + "score": 0.260409852867913, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.5200872448265565, - "sentence_nr": 0 + "score": 0.4693600515228538, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.21544027588567594, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.002624671916010499, - "sentence_nr": 0 + "score": 0.4576381595573422, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.13211758854099576, - "sentence_nr": 0 + "score": 0.2545286403887288, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4082004182520704, - "sentence_nr": 0 + "score": 0.46127229234959366, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.4416835863595156, - "sentence_nr": 0 + "score": 0.24874987153684608, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.623117008858419, - "sentence_nr": 0 + "score": 0.4814988208653403, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.4249436481722545, - "sentence_nr": 0 + "score": 0.4140011428776289, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.6187144317500936, - "sentence_nr": 0 + "score": 0.6412021306400884, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.3867569653562107, - "sentence_nr": 0 + "score": 0.3759002268420169, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5709420484876131, - "sentence_nr": 0 + "score": 0.6120997127625288, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.4321751051142018, - "sentence_nr": 0 + "score": 0.17716893523927718, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.627219234526359, - "sentence_nr": 0 + "score": 0.3125133953892873, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.3093560853993581, - "sentence_nr": 0 + "score": 0.22843578925939137, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.5175656995600133, - "sentence_nr": 0 + "score": 0.39333887911230325, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.37381473043548746, - "sentence_nr": 0 + "score": 0.19035778476657209, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.5975179598905689, - "sentence_nr": 0 + "score": 0.32011375391986463, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.46916325232132, - "sentence_nr": 0 + "score": 0.21529598963807312, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.6201105534360691, - "sentence_nr": 0 + "score": 0.47472255443386435, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.4355769714618406, - "sentence_nr": 0 + "score": 0.19319522417917573, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5931050934922778, - "sentence_nr": 0 + "score": 0.5294666692683903, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.2664145897759877, - "sentence_nr": 0 + "score": 0.1876442538016413, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.4652749975820786, - "sentence_nr": 0 + "score": 0.45717296303154553, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.5404337589009207, - "sentence_nr": 0 + "score": 0.22319344534343544, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.6743788008091396, - "sentence_nr": 0 + "score": 0.47255822473411646, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3431794518924713, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.023087320015249598, - "sentence_nr": 0 + "score": 0.5291073153069198, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.48139810095340524, - "sentence_nr": 0 + "score": 0.2075953797357176, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.6662472676876138, - "sentence_nr": 0 + "score": 0.418796448457094, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.621898873312397, - "sentence_nr": 0 + "score": 0.17593291675420053, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.7757345897028827, - "sentence_nr": 0 + "score": 0.45966885600223345, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.5494249598159933, - "sentence_nr": 0 + "score": 0.30749506855677367, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.7465246513770903, - "sentence_nr": 0 + "score": 0.5615365420131465, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.5335140114876958, - "sentence_nr": 0 + "score": 0.2539342198718324, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.7053320460577175, - "sentence_nr": 0 + "score": 0.48976692911803554, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.5119388728106423, - "sentence_nr": 0 + "score": 0.26939482991021874, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.7182655499139301, - "sentence_nr": 0 + "score": 0.564348572305916, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.4899631202302039, - "sentence_nr": 0 + "score": 0.28232804221956187, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.7271470388040862, - "sentence_nr": 0 + "score": 0.639242930472136, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.5895359597121981, - "sentence_nr": 0 + "score": 0.24677721152898274, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.7417056216737207, - "sentence_nr": 0 + "score": 0.5655655793718459, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.634306550586776, - "sentence_nr": 0 + "score": 0.177282908048097, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.8018062683769096, - "sentence_nr": 0 + "score": 0.5048008630035653, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.6491865905765292, - "sentence_nr": 0 + "score": 0.24609114091724077, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.7886752509790889, - "sentence_nr": 0 + "score": 0.5163247162943534, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.20586736678432452, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5290915360201753, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.6466026323731634, - "sentence_nr": 0 + "score": 0.05670064571372339, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.813492453726107, - "sentence_nr": 0 + "score": 0.16937000725041657, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.11746179377391347, - "sentence_nr": 0 + "score": 0.4219264367109449, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.40500326360451494, - "sentence_nr": 0 + "score": 0.5790052627496669, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.43873222664111144, - "sentence_nr": 0 + "score": 0.21550905403743137, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.6675706911715913, - "sentence_nr": 0 + "score": 0.44719679117350436, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.1659862741557369, - "sentence_nr": 0 + "score": 0.19946335945716726, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.4884440880714965, - "sentence_nr": 0 + "score": 0.5412386252302255, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.40269672228447434, - "sentence_nr": 0 + "score": 0.27966169949383496, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.6225404903248234, - "sentence_nr": 0 + "score": 0.537239861484062, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.3695995811393786, - "sentence_nr": 0 + "score": 0.32980384185673844, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.6148303949607244, - "sentence_nr": 0 + "score": 0.6175883753955328, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.4352046882668779, - "sentence_nr": 0 + "score": 0.14382854899355546, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.6480277237944805, - "sentence_nr": 0 + "score": 0.26400383568118985, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.2749090602792788, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.5428290107991267, - "sentence_nr": 0 + "score": 0.21514404656488983, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.43090922851400165, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.6435927083190817, - "sentence_nr": 0 + "score": 0.22551384015559367, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.5515333710683049, - "sentence_nr": 0 + "score": 0.30538385012782954, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.7161329488530405, - "sentence_nr": 0 + "score": 0.5121153023805728, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.5034431110377162, - "sentence_nr": 0 + "score": 0.21889549804942124, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.7060517244920654, - "sentence_nr": 0 + "score": 0.3940841212708787, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.1882889817107982, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3522812586532728, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.5515333710683049, - "sentence_nr": 0 + "score": 0.3618488169166299, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.7161440924360718, - "sentence_nr": 0 + "score": 0.6178847628712388, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.11143214500495838, - "sentence_nr": 0 + "score": 0.431319746325093, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.4461426742236597, - "sentence_nr": 0 + "score": 0.6234382849939584, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.10027952357557399, - "sentence_nr": 0 + "score": 0.3161306379595585, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.4397450208902438, - "sentence_nr": 0 + "score": 0.6012304838142994, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.2917184142654506, - "sentence_nr": 0 + "score": 0.31487248334376844, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5624937546502969, - "sentence_nr": 0 + "score": 0.5635244346599635, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.2373642291509686, - "sentence_nr": 0 + "score": 0.22897967367089514, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.521644947712484, - "sentence_nr": 0 + "score": 0.5334911242844559, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.2712572779797431, - "sentence_nr": 0 + "score": 0.26709890828869226, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5422335579149541, - "sentence_nr": 0 + "score": 0.5042111985234817, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.3013230432873079, - "sentence_nr": 0 + "score": 0.4218999224827276, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5337387225243135, - "sentence_nr": 0 + "score": 0.6489282208332532, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.2513195864511859, - "sentence_nr": 0 + "score": 0.26356793966181546, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5265625099865896, - "sentence_nr": 0 + "score": 0.5191302272110829, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.2870712623899153, - "sentence_nr": 0 + "score": 0.20298700573422315, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5282814167115877, - "sentence_nr": 0 + "score": 0.3905231106721993, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.3551592549479943, - "sentence_nr": 0 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.5657145174510132, - "sentence_nr": 0 + "score": 0.5746238432846977, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.21555439980902089, - "sentence_nr": 0 + "score": 0.34637568582379935, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.4547821375357056, - "sentence_nr": 0 + "score": 0.5514391895148156, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.26704844930269833, - "sentence_nr": 0 + "score": 0.24720511037119816, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.5046361305048248, - "sentence_nr": 0 + "score": 0.4462551342337241, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.37842676092304117, - "sentence_nr": 0 + "score": 0.23380867598952562, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.6082386652087882, - "sentence_nr": 0 + "score": 0.4731313764465835, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.2194742003215349, - "sentence_nr": 0 + "score": 0.32326983669535764, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.44824310405856826, - "sentence_nr": 0 + "score": 0.5757950493268048, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.20824983259105864, - "sentence_nr": 0 + "score": 0.2920934313715234, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.467721519642842, - "sentence_nr": 0 + "score": 0.4983574989743429, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.42143379809685383, - "sentence_nr": 0 + "score": 0.1529466247397943, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5946121916131629, - "sentence_nr": 0 + "score": 0.4275296567755792, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.4141553414774169, - "sentence_nr": 0 + "score": 0.309848051124064, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5906263169622974, - "sentence_nr": 0 + "score": 0.5622431891031534, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.20087168885945464, - "sentence_nr": 0 + "score": 0.2516768028374535, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.38939667381078735, - "sentence_nr": 0 + "score": 0.5052262603078841, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.2472558107415106, - "sentence_nr": 0 + "score": 0.2380050699329688, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.48369132925944186, - "sentence_nr": 0 + "score": 0.48227504945496735, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.2741125110896123, - "sentence_nr": 0 + "score": 0.18624263881830802, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.4729834657756108, - "sentence_nr": 0 + "score": 0.4914113027832365, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.2209042538614563, - "sentence_nr": 0 + "score": 0.2395446927992299, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.4506672688399074, - "sentence_nr": 0 + "score": 0.4721484222602001, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.4735425104795809, - "sentence_nr": 0 + "score": 0.20251299853063762, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.6471388802003065, - "sentence_nr": 0 + "score": 0.36192674925462354, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.3765436233955492, - "sentence_nr": 0 + "score": 0.12015228994776961, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.5846836525025664, - "sentence_nr": 0 + "score": 0.31437848676811814, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.14346012973809613, - "sentence_nr": 0 + "score": 0.17979384730979156, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.3278530649792867, - "sentence_nr": 0 + "score": 0.331355254735914, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.44161578383393324, - "sentence_nr": 0 + "score": 0.3291256332376796, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.6271889973227618, - "sentence_nr": 0 + "score": 0.5670250015789864, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.020710959564793303, - "sentence_nr": 0 + "score": 0.3422882142242731, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.17448216344563025, - "sentence_nr": 0 + "score": 0.5278861608717469, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.2803509486829134, - "sentence_nr": 0 + "score": 0.38564863816921563, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.4797524025621454, - "sentence_nr": 0 + "score": 0.4887006722841345, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.19552775795890473, - "sentence_nr": 0 + "score": 0.2489574113984516, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.3925483761400883, - "sentence_nr": 0 + "score": 0.5438702135465744, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.24101134936111826, - "sentence_nr": 0 + "score": 0.25119117418063647, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.42220180022908466, - "sentence_nr": 0 + "score": 0.5358947011982449, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.22800980663874482, - "sentence_nr": 0 + "score": 0.24117223077042385, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.4539695239053247, - "sentence_nr": 0 + "score": 0.512020635779483, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.21943479669895977, - "sentence_nr": 0 + "score": 0.23020656163897005, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.43919819731535836, - "sentence_nr": 0 + "score": 0.5608590094117443, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2988707080433144, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.15018919509760256, - "sentence_nr": 0 + "score": 0.5286791480233601, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.29215021962379045, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.1540492458272462, - "sentence_nr": 0 + "score": 0.5527751145536495, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.27133492648358953, - "sentence_nr": 0 + "score": 0.1931328662607509, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4876773846207858, - "sentence_nr": 0 + "score": 0.43550456875371113, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.2637525786875995, - "sentence_nr": 0 + "score": 0.3394516832204828, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.44971099880168447, - "sentence_nr": 0 + "score": 0.5487992573856032, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.20801258614305904, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4082367628634589, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.29126430238399986, - "sentence_nr": 0 + "score": 0.29222881654408056, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.48487518930512413, - "sentence_nr": 0 + "score": 0.6120984237392771, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0493406602622155, - "sentence_nr": 0 + "score": 0.30956660793759877, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.23818177576740765, - "sentence_nr": 0 + "score": 0.6188773222172356, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.09711337436010993, - "sentence_nr": 0 + "score": 0.14588825992287732, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.2876823823317057, - "sentence_nr": 0 + "score": 0.39984326863280045, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.2364242732935431, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.5166025885857578, - "sentence_nr": 0 + "score": 0.38935973617512226, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.21954964295787202, - "sentence_nr": 0 + "score": 0.09100730294865149, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.48436759393641593, - "sentence_nr": 0 + "score": 0.4152991006861775, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.5054426458074261, - "sentence_nr": 0 + "score": 0.12576299804399627, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.6722694706437392, - "sentence_nr": 0 + "score": 0.4275740936545043, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.39300609312334356, - "sentence_nr": 0 + "score": 0.16421603133867055, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.6127424804854223, - "sentence_nr": 0 + "score": 0.29137424728903016, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.3677920388643988, - "sentence_nr": 0 + "score": 0.20812209921683228, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.5936461766226937, - "sentence_nr": 0 + "score": 0.31687414190905666, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.38663642984572166, - "sentence_nr": 0 + "score": 0.16498223460029865, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.6246274082500232, - "sentence_nr": 0 + "score": 0.2972734576062982, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.5306942251812361, - "sentence_nr": 0 + "score": 0.274614810062371, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.7085606774707288, - "sentence_nr": 0 + "score": 0.5281783547748619, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.18452698284010527, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.000841750841750842, - "sentence_nr": 0 + "score": 0.37129342404244153, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.1649362336939456, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.36964185672093963, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.5173149058064286, - "sentence_nr": 0 + "score": 0.34396207830145586, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.7076921197210724, - "sentence_nr": 0 + "score": 0.5775887851128505, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.3348485220754285, - "sentence_nr": 0 + "score": 0.30371045098471633, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.5526454143704483, - "sentence_nr": 0 + "score": 0.6140790369362206, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.4490860834534268, - "sentence_nr": 0 + "score": 0.2252297536658673, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.6673191432059211, - "sentence_nr": 0 + "score": 0.437729946490623, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.33382920003857136, - "sentence_nr": 0 + "score": 0.3004961314114194, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.5343019280932326, - "sentence_nr": 0 + "score": 0.6112720381807045, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.3885821466849501, - "sentence_nr": 0 + "score": 0.2668575997365348, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.5985448528428169, - "sentence_nr": 0 + "score": 0.5174669930427155, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.35162367832688185, - "sentence_nr": 0 + "score": 0.25289636204048427, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.5470403853789135, - "sentence_nr": 0 + "score": 0.55030569340461, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.3690613106650631, - "sentence_nr": 0 + "score": 0.2247327109713433, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.5679268631651113, - "sentence_nr": 0 + "score": 0.5250140675378029, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.36458100799846727, - "sentence_nr": 0 + "score": 0.3378883984281531, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.556692655226023, - "sentence_nr": 0 + "score": 0.6049772225333672, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.3922851712831046, - "sentence_nr": 0 + "score": 0.2761659300730445, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.6222668712396683, - "sentence_nr": 0 + "score": 0.5565926641426052, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.3615358594548135, - "sentence_nr": 0 + "score": 0.13384453331197527, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.5764831888443301, - "sentence_nr": 0 + "score": 0.4151425963129396, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.2709924119064872, - "sentence_nr": 0 + "score": 0.2276261087372084, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.4882782050991019, - "sentence_nr": 0 + "score": 0.5006338961901005, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.22089230502491558, - "sentence_nr": 0 + "score": 0.20109176688134525, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.4677450306632732, - "sentence_nr": 0 + "score": 0.5208655725098277, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.3192042127191187, - "sentence_nr": 0 + "score": 0.3003653956261136, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.5199223947442019, - "sentence_nr": 0 + "score": 0.5819235916814075, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", + "metric": "bleu", + "score": 0.34009641866679796, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.0015001500150015, - "sentence_nr": 0 + "score": 0.6387903483458015, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.39061033693599795, - "sentence_nr": 0 + "score": 0.28509173779340485, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.5825589467646037, - "sentence_nr": 0 + "score": 0.5531716447251654, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.25066959615472983, - "sentence_nr": 0 + "score": 0.14651860136741404, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.4464863544842361, - "sentence_nr": 0 + "score": 0.26874220962782625, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.24634920227044405, - "sentence_nr": 0 + "score": 0.3014199920541698, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.4663964950094987, - "sentence_nr": 0 + "score": 0.37258990587027996, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.2030779777377279, - "sentence_nr": 0 + "score": 0.2169400845409205, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.433265414942881, - "sentence_nr": 0 + "score": 0.352650085718584, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.18149017327984754, - "sentence_nr": 0 + "score": 0.280867833557141, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.40703450831310045, - "sentence_nr": 0 + "score": 0.47682234542802715, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.13329022775771593, - "sentence_nr": 0 + "score": 0.3195214890612964, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.3376146076257409, - "sentence_nr": 0 + "score": 0.6238377764870237, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.058929736139753434, - "sentence_nr": 0 + "score": 0.2026004770366011, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.264080458624048, - "sentence_nr": 0 + "score": 0.4124307729296919, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.3173813158730172, - "sentence_nr": 0 + "score": 0.3479857106948536, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.4928463609252142, - "sentence_nr": 0 + "score": 0.5434098077482219, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.305374145776946, - "sentence_nr": 0 + "score": 0.127094130129695, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.510534704480906, - "sentence_nr": 0 + "score": 0.37183060884198066, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.1616475408517619, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2842078929375233, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.20642594358613336, - "sentence_nr": 0 + "score": 0.22839293770911745, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.4167457729059204, - "sentence_nr": 0 + "score": 0.4657355446143013, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.1709913567536511, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.010856884256461188, - "sentence_nr": 0 + "score": 0.29449196775233905, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.23736991525740553, - "sentence_nr": 0 + "score": 0.13435637642994447, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.41867456253450896, - "sentence_nr": 0 + "score": 0.34277719024611025, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.3969548673353603, - "sentence_nr": 0 + "score": 0.19606965736186524, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.6084494342072353, - "sentence_nr": 0 + "score": 0.5029030066686957, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.44834209038718303, - "sentence_nr": 0 + "score": 0.19598322445625943, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.6192927072328505, - "sentence_nr": 0 + "score": 0.46665751191230503, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.4059702785610718, - "sentence_nr": 0 + "score": 0.21170876705481304, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5924126044868774, - "sentence_nr": 0 + "score": 0.5213888058464138, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.4845230115211525, - "sentence_nr": 0 + "score": 0.2104347389999275, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.6342072643481442, - "sentence_nr": 0 + "score": 0.41820208790180724, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.32203015680943314, - "sentence_nr": 0 + "score": 0.33893526679717595, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.529821905278818, - "sentence_nr": 0 + "score": 0.5305909471293387, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.22666412589302412, - "sentence_nr": 0 + "score": 0.4005296397635166, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.40566177288010574, - "sentence_nr": 0 + "score": 0.5780131186067837, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.5013254539312615, - "sentence_nr": 0 + "score": 0.2534684260065973, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.667586765919732, - "sentence_nr": 0 + "score": 0.465022490109088, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.449699837581857, - "sentence_nr": 0 + "score": 0.28341626687166926, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5866210246057594, - "sentence_nr": 0 + "score": 0.4981912496496188, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.18787234368655517, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.43638553308108674, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.4570312379742113, - "sentence_nr": 0 + "score": 0.19035778476657214, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.6515364604825435, - "sentence_nr": 0 + "score": 0.3275220698724237, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.06170226864451068, - "sentence_nr": 0 + "score": 0.2139885278593109, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.2910964368746218, - "sentence_nr": 0 + "score": 0.5273959990464491, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.2303962170230739, - "sentence_nr": 0 + "score": 0.16558141211628247, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.4892587840921042, - "sentence_nr": 0 + "score": 0.37532912975144084, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.4540422742824559, - "sentence_nr": 0 + "score": 0.11012419619306524, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.6751320303512911, - "sentence_nr": 0 + "score": 0.4937984099457621, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.5339026027654551, - "sentence_nr": 0 + "score": 0.14599223028360678, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.7303050277242, - "sentence_nr": 0 + "score": 0.4828499846637324, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.45382991587984656, - "sentence_nr": 0 + "score": 0.12579787892324615, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.6795124822993059, - "sentence_nr": 0 + "score": 0.46133126472684716, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.454110885283082, - "sentence_nr": 0 + "score": 0.1281637706417447, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.6757624175597331, - "sentence_nr": 0 + "score": 0.22833109825855033, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.4142879154157889, - "sentence_nr": 0 + "score": 0.25500119387217685, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.6227005613083273, - "sentence_nr": 0 + "score": 0.42217126960650364, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.3673143858550097, - "sentence_nr": 0 + "score": 0.29264105234089743, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.5999948756290627, - "sentence_nr": 0 + "score": 0.36844739850003594, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5933908752486619, - "sentence_nr": 0 + "score": 0.38249626297768063, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7521076687486573, - "sentence_nr": 0 + "score": 0.40976234193505356, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5389195071775074, - "sentence_nr": 0 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7120805996785787, - "sentence_nr": 0 + "score": 0.7882997401328445, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.5806197937310393, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.7346706700987636, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.5469323345187914, - "sentence_nr": 0 + "score": 0.08635800047213174, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7053301383823619, - "sentence_nr": 0 + "score": 0.218109371254876, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2232040735029205, - "sentence_nr": 0 + "score": 0.25552199116069907, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5082535342764724, - "sentence_nr": 0 + "score": 0.3799133205289109, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2603408634436383, - "sentence_nr": 0 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5712310736675958, - "sentence_nr": 0 + "score": 0.3682311523733465, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5642761727828352, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.1518030911347623, - "sentence_nr": 0 + "score": 0.6181373706707737, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.03947087289497203, - "sentence_nr": 0 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.24871772534163297, - "sentence_nr": 0 + "score": 0.512762518189388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.15503659808176187, - "sentence_nr": 0 + "score": 0.6931369519059803, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.1587741341654334, - "sentence_nr": 0 + "score": 0.35447530946908884, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4577275269488853, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.16240427228171622, - "sentence_nr": 0 + "score": 0.6747054474171109, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.25383339228798274, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.15821359701915677, - "sentence_nr": 0 + "score": 0.45896379476820603, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.15138514598766048, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.1999554138760155, - "sentence_nr": 0 + "score": 0.3237497764315872, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.27668736912821895, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.15373049900664998, - "sentence_nr": 0 + "score": 0.4414406760568898, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.17200767571780612, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3723150838362789, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.15604242268653643, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.15710519562472744, - "sentence_nr": 0 + "score": 0.2255928425212252, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.16203519855642107, - "sentence_nr": 0 + "score": 0.6159319815107203, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.15948434818675836, - "sentence_nr": 0 + "score": 0.28685201698226354, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.0019920318725099606, - "sentence_nr": 0 + "score": 0.3088155734423375, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3259608048468566, - "sentence_nr": 0 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.546117067949716, - "sentence_nr": 0 + "score": 0.78479833664205, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.1658740169858733, - "sentence_nr": 0 + "score": 0.3254455687469726, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4024808935109278, - "sentence_nr": 0 + "score": 0.4474512036484817, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.20156158538354524, - "sentence_nr": 0 + "score": 0.353203510510529, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4362539345951223, - "sentence_nr": 0 + "score": 0.4910213297498164, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.16957342631054367, - "sentence_nr": 0 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4366938165395205, - "sentence_nr": 0 + "score": 0.5820265218174012, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.139369666953945, - "sentence_nr": 0 + "score": 0.23887527917609022, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3719644893339326, - "sentence_nr": 0 + "score": 0.4120359948636439, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.312443507311478, - "sentence_nr": 0 + "score": 0.23660362391696813, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5626555260532313, - "sentence_nr": 0 + "score": 0.34152697838249696, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.280153937179511, - "sentence_nr": 0 + "score": 0.580451128369423, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5315975363727116, - "sentence_nr": 0 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.580451128369423, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.728208634600343, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.25850597630381367, - "sentence_nr": 0 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5200460018701456, - "sentence_nr": 0 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.00503842243804861, - "sentence_nr": 0 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.07086730383931879, - "sentence_nr": 0 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.22826305620809492, - "sentence_nr": 0 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.41816062347701055, - "sentence_nr": 0 + "score": 1.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.28031528470622435, - "sentence_nr": 0 + "score": 0.34589895849033114, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5080912630709646, - "sentence_nr": 0 + "score": 0.44792042673107413, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.43161598042102073, - "sentence_nr": 0 + "score": 0.47320724783393625, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.575098943836209, - "sentence_nr": 0 + "score": 0.5833006006517599, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.23322806032691942, - "sentence_nr": 0 + "score": 0.3556521383601747, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4970157115640211, - "sentence_nr": 0 + "score": 0.594830811413066, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.18831548712362461, - "sentence_nr": 0 + "score": 0.19984607356962125, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4627923572183501, - "sentence_nr": 0 + "score": 0.29326031481052006, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.2774144264403638, - "sentence_nr": 0 + "score": 0.08939270118279458, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4953240887614079, - "sentence_nr": 0 + "score": 0.2952752522340665, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.2542725044959704, - "sentence_nr": 0 + "score": 0.21629114799587432, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.47939306548471916, - "sentence_nr": 0 + "score": 0.3542320138389837, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.5179684763994646, - "sentence_nr": 0 + "score": 0.27274191069381915, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6594622476886304, - "sentence_nr": 0 + "score": 0.37436438971100644, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.508262827374593, - "sentence_nr": 0 + "score": 0.580451128369423, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.6484865815175519, - "sentence_nr": 0 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.580451128369423, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.728208634600343, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.466089579180166, - "sentence_nr": 0 + "score": 0.2434330428491034, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6166427184736345, - "sentence_nr": 0 + "score": 0.31858900384957733, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0894726048800864, - "sentence_nr": 0 + "score": 0.583526016818016, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.32040245674130735, - "sentence_nr": 0 + "score": 0.6994652193905146, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.30154136324161096, - "sentence_nr": 0 + "score": 0.27405612859390877, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5214624883135771, - "sentence_nr": 0 + "score": 0.4639958592456083, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.32434551072073575, - "sentence_nr": 0 + "score": 0.24007528246707907, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5461576315951293, - "sentence_nr": 0 + "score": 0.31084467045503017, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.417842986003915, - "sentence_nr": 0 + "score": 0.6052987576779449, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6235945624226917, - "sentence_nr": 0 + "score": 0.643602170728296, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.4684582258807146, - "sentence_nr": 0 + "score": 0.13004800471424346, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6509870807795504, - "sentence_nr": 0 + "score": 0.28217142159025543, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4684582258807146, - "sentence_nr": 0 + "score": 0.3674668904964848, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6509870807795504, - "sentence_nr": 0 + "score": 0.40975628086142124, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2755812609839064, - "sentence_nr": 0 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5308706218209139, - "sentence_nr": 0 + "score": 0.5785251190053333, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.36065675483488874, - "sentence_nr": 0 + "score": 0.37821486365532614, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5848675866219151, - "sentence_nr": 0 + "score": 0.4718665834023439, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4628530384876385, - "sentence_nr": 0 + "score": 0.3763743474188506, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6179864674944966, - "sentence_nr": 0 + "score": 0.4120099199050514, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4307886337606128, - "sentence_nr": 0 + "score": 0.38694317759010316, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6064479768085549, - "sentence_nr": 0 + "score": 0.45827711860455167, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3699382260470039, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.09069594593173218, - "sentence_nr": 0 + "score": 0.4032851361478274, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4716885638382066, - "sentence_nr": 0 + "score": 0.23270804908165135, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6714009080674107, - "sentence_nr": 0 + "score": 0.3478589640284733, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4115167991342047, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.37155017280771785, - "sentence_nr": 0 + "score": 0.5649900101054287, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4025210047052182, - "sentence_nr": 0 + "score": 0.45167594566243024, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5985194736943847, - "sentence_nr": 0 + "score": 0.5169677927619225, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.37892189586155534, - "sentence_nr": 0 + "score": 0.22453002699007485, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5908052258359918, - "sentence_nr": 0 + "score": 0.3354597455808525, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3354557799221337, - "sentence_nr": 0 + "score": 0.24489516889906388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5828616357620534, - "sentence_nr": 0 + "score": 0.409369762090413, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.38540591321276524, - "sentence_nr": 0 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.6091815498132347, - "sentence_nr": 0 + "score": 0.3780009826926042, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.36977252605532496, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5998039337990848, - "sentence_nr": 0 + "score": 0.18341524527739528, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3322929293409608, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5804214136040975, - "sentence_nr": 0 + "score": 0.3989952325675248, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.32948276574633206, - "sentence_nr": 0 + "score": 0.3925121365052661, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5579608060488221, - "sentence_nr": 0 + "score": 0.47788592802001717, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.40748672718731094, - "sentence_nr": 0 + "score": 0.12789533377801793, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6185219751228138, - "sentence_nr": 0 + "score": 0.2283763803651714, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.41391222112524345, - "sentence_nr": 0 + "score": 0.15896519992112562, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6185000018140083, - "sentence_nr": 0 + "score": 0.29513999460654694, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.1423412184218882, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2596718628394258, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.37444352680714255, - "sentence_nr": 0 + "score": 0.3571150500823898, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5777605467565269, - "sentence_nr": 0 + "score": 0.44642876819396304, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.215971435415919, - "sentence_nr": 0 + "score": 0.3571150500823898, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.43504055478394826, - "sentence_nr": 0 + "score": 0.5018967494794737, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.36990978753067677, - "sentence_nr": 0 + "score": 0.3572188192648703, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5650271917718249, - "sentence_nr": 0 + "score": 0.45381175288762937, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.2913871477484173, - "sentence_nr": 0 + "score": 0.08968235248346597, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4778519392615073, - "sentence_nr": 0 + "score": 0.2175311081388801, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3859000637680225, - "sentence_nr": 0 + "score": 0.10682827247639556, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5858315307170925, - "sentence_nr": 0 + "score": 0.21551117313912851, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3738636268027588, - "sentence_nr": 0 + "score": 0.07425055521504613, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5633496484582216, - "sentence_nr": 0 + "score": 0.18122341046764998, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.4084190982281481, - "sentence_nr": 0 + "score": 0.1472462377094902, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6045916299793137, - "sentence_nr": 0 + "score": 0.30525310195831357, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.23598932454609683, - "sentence_nr": 0 + "score": 0.2774527633525211, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.45933330952484963, - "sentence_nr": 0 + "score": 0.4358323759361012, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.33832642802358825, - "sentence_nr": 0 + "score": 0.1978585723043446, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5469625730166827, - "sentence_nr": 0 + "score": 0.3527599187160617, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.2906205269998709, - "sentence_nr": 0 + "score": 0.31314224813827346, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4738555693911765, - "sentence_nr": 0 + "score": 0.3932583887521134, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.28272487839661065, - "sentence_nr": 0 + "score": 0.5199302229930708, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.46645581968232397, - "sentence_nr": 0 + "score": 0.6017481019884499, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.2523019529343173, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4406369072888057, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3725338410101549, - "sentence_nr": 0 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5606140962762308, - "sentence_nr": 0 + "score": 0.31221693968406194, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.11947542811972255, - "sentence_nr": 0 + "score": 0.19230188007838597, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.38211159102056313, - "sentence_nr": 0 + "score": 0.3407021378942239, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3582495248633888, - "sentence_nr": 0 + "score": 0.41072675483179805, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5337300736591853, - "sentence_nr": 0 + "score": 0.5635589150380774, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3850293035436385, - "sentence_nr": 0 + "score": 0.12045422179467957, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5627108155348461, - "sentence_nr": 0 + "score": 0.22184013352319704, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.36775365397595855, - "sentence_nr": 0 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5429838358858414, - "sentence_nr": 0 + "score": 0.6612342258381259, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3742787779358338, - "sentence_nr": 0 + "score": 0.3883375900135818, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5507724077862277, - "sentence_nr": 0 + "score": 0.4643731845106876, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.36166370485178767, - "sentence_nr": 0 + "score": 0.33062429129755794, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5544237122855078, - "sentence_nr": 0 + "score": 0.4887128900317842, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.22502019314147165, - "sentence_nr": 0 + "score": 0.6064630666233242, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4660050287510241, - "sentence_nr": 0 + "score": 0.6712747226800536, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.2611379210999344, - "sentence_nr": 0 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5199168221681734, - "sentence_nr": 0 + "score": 0.7123666275414222, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.3179668927459568, - "sentence_nr": 0 + "score": 0.13536681105774234, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5246182258245259, - "sentence_nr": 0 + "score": 0.23595972523293418, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.36615448670581846, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5458789775720151, - "sentence_nr": 0 + "score": 0.14728954312449322, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.2256490809237466, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.37300331821940047, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4688461566377855, - "sentence_nr": 0 + "score": 0.34791594751284466, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6590799487386328, - "sentence_nr": 0 + "score": 0.4062384532979022, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.2050862226628115, - "sentence_nr": 0 + "score": 0.7795149903947967, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4479394296115543, - "sentence_nr": 0 + "score": 0.8912732146280626, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.362962020143119, - "sentence_nr": 0 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5507900287805876, - "sentence_nr": 0 + "score": 0.6994652193905146, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2042633250999265, - "sentence_nr": 0 + "score": 0.33737554588923646, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4024854380942464, - "sentence_nr": 0 + "score": 0.369875665962309, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.21682957830342386, - "sentence_nr": 0 + "score": 0.30389058699653954, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.40328066851151617, - "sentence_nr": 0 + "score": 0.4310896909809194, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3763278728427448, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.17569336234053629, - "sentence_nr": 0 + "score": 0.4261526683335186, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3924259174695316, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.1687808278077641, - "sentence_nr": 0 + "score": 0.4246539836622663, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.39022736644855677, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.17581812142544698, - "sentence_nr": 0 + "score": 0.5202587069271436, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3672404084841361, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.1795359810750476, - "sentence_nr": 0 + "score": 0.4518744271362045, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.34908859517231833, - "sentence_nr": 0 + "score": 0.21027545940631823, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5054950855070256, - "sentence_nr": 0 + "score": 0.4572185175571455, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.142431283922237, - "sentence_nr": 0 + "score": 0.25383339228798274, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3609780899068605, - "sentence_nr": 0 + "score": 0.46123461430035645, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.16401798649868696, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3760928911869727, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.11414633188690328, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.17861900953769713, - "sentence_nr": 0 + "score": 0.2503197875391322, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.09260326033959126, - "sentence_nr": 0 + "score": 0.17807129401511626, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.025515907504753448, - "sentence_nr": 0 + "score": 0.07843772989359644, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.18744523405760177, - "sentence_nr": 0 + "score": 0.2669076052967215, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.13989491400872253, - "sentence_nr": 0 + "score": 0.18105048502088059, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.4095257685037439, - "sentence_nr": 0 + "score": 0.271054087912132, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.37645276051115606, - "sentence_nr": 0 + "score": 0.3029928206533524, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5980216031532829, - "sentence_nr": 0 + "score": 0.45262153733641225, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.22917125225310467, - "sentence_nr": 0 + "score": 0.43000007605628365, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.47903700624901113, - "sentence_nr": 0 + "score": 0.5073076708050636, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.27318782983287254, - "sentence_nr": 0 + "score": 0.16767849550785174, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5543770120341358, - "sentence_nr": 0 + "score": 0.32211607665330505, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.17948041177954654, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.479796413592652, - "sentence_nr": 0 + "score": 0.3068038152276031, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.06583739589075036, - "sentence_nr": 0 + "score": 0.17278760032513005, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.2886125811836185, - "sentence_nr": 0 + "score": 0.3872014058095359, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.3285631316988268, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.5548316779589381, - "sentence_nr": 0 + "score": 0.1032637856654291, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.2696625693704166, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4800260750132835, - "sentence_nr": 0 + "score": 0.12256115024243901, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.1667153530685962, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.31125924230690827, - "sentence_nr": 0 + "score": 0.2563322947493884, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.3225412913490767, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5444997824452831, - "sentence_nr": 0 + "score": 0.17694915875422723, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.20162477784805663, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.0013495276653171392, - "sentence_nr": 0 + "score": 0.4096636937616398, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.15376887027903216, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.38954501570553435, - "sentence_nr": 0 + "score": 0.18653722013077995, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.02228851669741669, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.16941662225476226, - "sentence_nr": 0 + "score": 0.21582567264281033, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.15925566245534395, - "sentence_nr": 0 + "score": 0.16856369030953883, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.020022039661695485, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.18319796614102749, - "sentence_nr": 0 + "score": 0.17222588358752802, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.25977796098643696, - "sentence_nr": 0 + "score": 0.17620697694486812, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.45846399974797636, - "sentence_nr": 0 + "score": 0.3349127944613063, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.21343334222861665, - "sentence_nr": 0 + "score": 0.49806572776935465, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.4071704087995375, - "sentence_nr": 0 + "score": 0.643432991222625, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.03859344502039764, - "sentence_nr": 0 + "score": 0.40515809323728763, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.18727863561096483, - "sentence_nr": 0 + "score": 0.5449200667389397, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.33330884243765846, - "sentence_nr": 0 + "score": 0.15285713412458543, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.5553752919921839, - "sentence_nr": 0 + "score": 0.24746988080227628, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.3558905194860598, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.5530965003702101, - "sentence_nr": 0 + "score": 0.22777548520508317, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.10845182904139573, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.27825798566245524, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.3629548639887491, - "sentence_nr": 0 + "score": 0.21004850229269248, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.5921544724176951, - "sentence_nr": 0 + "score": 0.3447734406041285, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.22743363869750483, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.16686725758165746, - "sentence_nr": 0 + "score": 0.4248437050568334, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.027711989596895026, - "sentence_nr": 0 + "score": 0.29579846078012384, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.18513440795413078, - "sentence_nr": 0 + "score": 0.5170279422640637, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2512712267295304, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.18337542465784618, - "sentence_nr": 0 + "score": 0.2928484110896528, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.20723903671796345, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.2738250966440318, - "sentence_nr": 0 + "score": 0.2746682387996949, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3626305461419687, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.17457751379065342, - "sentence_nr": 0 + "score": 0.36369161190356464, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2539593860148789, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.16758070996072438, - "sentence_nr": 0 + "score": 0.31852925024650675, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.22650664914012167, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.1648217566450672, - "sentence_nr": 0 + "score": 0.44021101913103755, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3039519244807058, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.14937766642643976, - "sentence_nr": 0 + "score": 0.5267184242152971, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.1539372614821037, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.39281132309216027, - "sentence_nr": 0 + "score": 0.1416851070122953, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.16643025437231165, - "sentence_nr": 0 + "score": 0.23506726943391335, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.16864925448492676, - "sentence_nr": 0 + "score": 0.16170884319006984, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2250265947708922, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.14310977738931444, - "sentence_nr": 0 + "score": 0.29110362374826554, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.30776997671170997, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.18336900353303615, - "sentence_nr": 0 + "score": 0.5302815626138546, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.044083968419034406, - "sentence_nr": 0 + "score": 0.355402294764931, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.1910937333522565, - "sentence_nr": 0 + "score": 0.4505070984023486, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.2364341169976402, - "sentence_nr": 0 + "score": 0.1411791520898124, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.44688068305416384, - "sentence_nr": 0 + "score": 0.2902470444985328, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.3275794528853699, - "sentence_nr": 0 + "score": 0.2171788734284664, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.490529412998314, - "sentence_nr": 0 + "score": 0.33899728453126426, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.3910557548848884, - "sentence_nr": 0 + "score": 0.1050176352370787, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.5640234702218941, - "sentence_nr": 0 + "score": 0.3096045806359874, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.42567034554082944, - "sentence_nr": 0 + "score": 0.35423985843000033, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.559555145699201, - "sentence_nr": 0 + "score": 0.449850771779881, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.35489774041724637, - "sentence_nr": 0 + "score": 0.5572806310452209, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.5417379433047397, - "sentence_nr": 0 + "score": 0.8076383886663636, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.19928570195743642, - "sentence_nr": 0 + "score": 0.40214612768560637, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.4190699838003337, - "sentence_nr": 0 + "score": 0.5573169779174251, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.45074421522718355, - "sentence_nr": 0 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.611234824040456, - "sentence_nr": 0 + "score": 0.46880886343224853, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.37895246763927487, - "sentence_nr": 0 + "score": 0.2782546336574456, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.5637705600813122, - "sentence_nr": 0 + "score": 0.42327771190588576, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.25982820626438285, - "sentence_nr": 0 + "score": 0.39109158855739823, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.448770635195091, - "sentence_nr": 0 + "score": 0.48247427218434713, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.3151111682106363, - "sentence_nr": 0 + "score": 0.3931807596037881, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5484564892322974, - "sentence_nr": 0 + "score": 0.3863308383621456, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.19075628574726866, - "sentence_nr": 0 + "score": 0.4185938787651429, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.41027751261677925, - "sentence_nr": 0 + "score": 0.46641278921549706, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.3104800728034583, - "sentence_nr": 0 + "score": 0.4185938787651429, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.4915887374711213, - "sentence_nr": 0 + "score": 0.46229900712285454, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.33493420443764327, - "sentence_nr": 0 + "score": 0.0842816865856957, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5494512089523403, - "sentence_nr": 0 + "score": 0.2485953030231616, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.18258051379187495, - "sentence_nr": 0 + "score": 0.21057401113505914, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.37144982797514564, - "sentence_nr": 0 + "score": 0.08678345215657841, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5566300649554314, - "sentence_nr": 0 + "score": 0.2461700872648841, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.37030779414532766, - "sentence_nr": 0 + "score": 0.26837663158331726, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.5686892684950948, - "sentence_nr": 0 + "score": 0.36155324846955933, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.29047920175774217, - "sentence_nr": 0 + "score": 0.580451128369423, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.49272880198145935, - "sentence_nr": 0 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.24965028943014114, - "sentence_nr": 0 + "score": 0.3525678415060714, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.4673190842234536, - "sentence_nr": 0 + "score": 0.543942790381318, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.41178982108320433, - "sentence_nr": 0 + "score": 0.13733894353973466, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.609042705412301, - "sentence_nr": 0 + "score": 0.2628097872401104, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.30730414574085574, - "sentence_nr": 0 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.5593126358550683, - "sentence_nr": 0 + "score": 0.7056438934239434, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.3925121365052661, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.47722987146488, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.30557926483272724, - "sentence_nr": 0 + "score": 0.3352430929236216, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5372819180558251, - "sentence_nr": 0 + "score": 0.37654805257811624, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.08180491833590103, - "sentence_nr": 0 + "score": 0.37284875432797243, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.311232289473797, - "sentence_nr": 0 + "score": 0.46330437039257283, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.3063581141268663, - "sentence_nr": 0 + "score": 0.14858713442145016, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.49046747666762064, - "sentence_nr": 0 + "score": 0.26570814480513566, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.35425134311526146, - "sentence_nr": 0 + "score": 0.2061927630692647, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.543224327229853, - "sentence_nr": 0 + "score": 0.28624131346749065, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.3446263661390609, - "sentence_nr": 0 + "score": 0.47046185035490873, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.5457062469198075, - "sentence_nr": 0 + "score": 0.6292601141059937, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.1844277711083256, - "sentence_nr": 0 + "score": 0.25983833013159885, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.39231379751700163, - "sentence_nr": 0 + "score": 0.3730064448362738, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.22458010101026027, - "sentence_nr": 0 + "score": 0.0926947735155968, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.419073948717225, - "sentence_nr": 0 + "score": 0.27152680319064787, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.16129088493851212, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.3633785507778129, - "sentence_nr": 0 + "score": 0.22886565624051966, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.0923267971632956, - "sentence_nr": 0 + "score": 0.09769805815777928, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.3309231267378018, - "sentence_nr": 0 + "score": 0.2758478148913772, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.3923760262816128, - "sentence_nr": 0 + "score": 0.15867077366552554, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.5790825048444385, - "sentence_nr": 0 + "score": 0.2515767320274808, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.44439671603383196, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.6336463934979399, - "sentence_nr": 0 + "score": 0.21300958856758825, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.032197629403649654, - "sentence_nr": 0 + "score": 0.10629625019345329, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.20237585876837602, - "sentence_nr": 0 + "score": 0.2894452751925746, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.3494358555121005, - "sentence_nr": 0 + "score": 0.5181212746323438, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.5521153931296311, - "sentence_nr": 0 + "score": 0.5589229357546774, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.04211115562584218, - "sentence_nr": 0 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.21787753877924487, - "sentence_nr": 0 + "score": 0.5295425318265925, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.18060798993874613, - "sentence_nr": 0 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.42347596848174995, - "sentence_nr": 0 + "score": 0.7155411017347171, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.10142265089946709, - "sentence_nr": 0 + "score": 0.21333164424828907, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.23765231683034127, - "sentence_nr": 0 + "score": 0.32785783444873706, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.13326254700682963, - "sentence_nr": 0 + "score": 0.4825755887985002, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.37783345527529155, - "sentence_nr": 0 + "score": 0.5834117627541725, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0836098993777203, - "sentence_nr": 0 + "score": 0.4427274357129559, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.28681946123560914, - "sentence_nr": 0 + "score": 0.48566583494323345, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.25753830733173966, - "sentence_nr": 0 + "score": 0.16312445849704404, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.21097146062542602, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.41556645044647633, - "sentence_nr": 0 + "score": 0.17349790295171436, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.12765317762547787, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.0013785497656465398, - "sentence_nr": 0 + "score": 0.20795234713221633, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.19960018464952517, - "sentence_nr": 0 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.40487558185186295, - "sentence_nr": 0 + "score": 0.39302513361762836, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.15308678224580158, - "sentence_nr": 0 + "score": 0.5136268735913038, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.3410590522933235, - "sentence_nr": 0 + "score": 0.7004219512590859, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.04145921595437363, - "sentence_nr": 0 + "score": 0.7183290415445132, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.16740774049127094, - "sentence_nr": 0 + "score": 0.2677353447271197, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.36106868107953355, - "sentence_nr": 0 + "score": 0.33809821343395446, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3414464563275225, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.1642492535825589, - "sentence_nr": 0 + "score": 0.4182681167677125, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.054611896822102844, - "sentence_nr": 0 + "score": 0.43812558475283875, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.21077174268367538, - "sentence_nr": 0 + "score": 0.5153914304432097, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.23361580096963977, - "sentence_nr": 0 + "score": 0.1022763758993479, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.4620323651475797, - "sentence_nr": 0 + "score": 0.26825052055805815, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.2464000786532921, - "sentence_nr": 0 + "score": 0.18808242155433705, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.44961038359873023, - "sentence_nr": 0 + "score": 0.3220587874741547, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.2761854595042038, - "sentence_nr": 0 + "score": 0.1022763758993479, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.505027473861755, - "sentence_nr": 0 + "score": 0.2693375138315219, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.23803104895136312, - "sentence_nr": 0 + "score": 0.08023149270718091, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.46632697077584034, - "sentence_nr": 0 + "score": 0.279740710493905, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.26526917699436564, - "sentence_nr": 0 + "score": 0.21330178332703942, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.49277477098127986, - "sentence_nr": 0 + "score": 0.4261888165527193, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.18465678525703502, - "sentence_nr": 0 + "score": 0.06534434987768793, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.4580829060349368, - "sentence_nr": 0 + "score": 0.27176767387111833, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.27522942238370274, - "sentence_nr": 0 + "score": 0.16168270317308941, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.47639859532742806, - "sentence_nr": 0 + "score": 0.26228540738738376, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.2534162743837895, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.47327726917113905, - "sentence_nr": 0 + "score": 0.305925215411119, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.2442210345943194, - "sentence_nr": 0 + "score": 0.3174603493865962, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.44602068344467516, - "sentence_nr": 0 + "score": 0.465550295868511, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.2650730112906958, - "sentence_nr": 0 + "score": 0.25430316746203985, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.46711748522451396, - "sentence_nr": 0 + "score": 0.31361769699186176, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.06394923432983099, - "sentence_nr": 0 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.32186431939465493, - "sentence_nr": 0 + "score": 0.7056438934239434, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.2147261886416508, - "sentence_nr": 0 + "score": 0.41072675483179805, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.47511420742329435, - "sentence_nr": 0 + "score": 0.5635589150380774, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.06850339366064954, - "sentence_nr": 0 + "score": 0.3514245731837287, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.2947252945949938, - "sentence_nr": 0 + "score": 0.534130899739072, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3090705808198716, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.20218909354463535, - "sentence_nr": 0 + "score": 0.4997989608278053, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.1512699697277094, - "sentence_nr": 0 + "score": 0.3552281813814547, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.3128685016104829, - "sentence_nr": 0 + "score": 0.5479990039688047, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.16920203169238196, - "sentence_nr": 0 + "score": 0.264371505578968, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.36485146073913816, - "sentence_nr": 0 + "score": 0.3692663913160793, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.21476638434350095, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.3754665454783431, - "sentence_nr": 0 + "score": 0.40358627497421223, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.07004290256012695, - "sentence_nr": 0 + "score": 0.15604242268653643, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.2672503731106106, - "sentence_nr": 0 + "score": 0.35440326623172935, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.35506855852882296, - "sentence_nr": 0 + "score": 0.13536681105774234, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.5556495329232688, - "sentence_nr": 0 + "score": 0.29163125383681, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.21052905851500206, - "sentence_nr": 0 + "score": 0.6052987576779449, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.38934548871055996, - "sentence_nr": 0 + "score": 0.6809283802101068, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.29580528518835375, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4152245863377912, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.22330682701224286, - "sentence_nr": 0 + "score": 0.108043996762779, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.4107198616872477, - "sentence_nr": 0 + "score": 0.2627337195947467, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.18085702029043885, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.2756948589881712, - "sentence_nr": 0 + "score": 0.338987683983403, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.15653267028380505, - "sentence_nr": 0 + "score": 0.12829843029207522, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.3368420125937692, - "sentence_nr": 0 + "score": 0.2926652353247206, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 + "score": 0.16516473320936778, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.07407154448063642, - "sentence_nr": 1 + "score": 0.11146727460890443, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.43145434527321425, - "sentence_nr": 1 + "score": 0.29428893607214085, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 + "score": 0.10640850690356463, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 + "score": 0.2838000569859586, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.12987293870549732, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.024459391267874976, - "sentence_nr": 1 + "score": 0.28422427146147505, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.12351824822447692, - "sentence_nr": 1 + "score": 0.14849103164051436, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.46822754470803873, - "sentence_nr": 1 + "score": 0.30883024781428503, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1934884374107349, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.4031456247133876, - "sentence_nr": 1 + "score": 0.40687776179631713, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.19194937906573872, - "sentence_nr": 1 + "score": 0.24344044484205296, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.5477665664300843, - "sentence_nr": 1 + "score": 0.36944883808094725, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.20669086265781264, - "sentence_nr": 1 + "score": 0.16436148154531297, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.5076721272198604, - "sentence_nr": 1 + "score": 0.3129244553794762, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.17630490037560695, - "sentence_nr": 1 + "score": 0.3545649986147617, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.48116430160978857, - "sentence_nr": 1 + "score": 0.4749542277669906, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.15611634095633747, - "sentence_nr": 1 + "score": 0.15326140415213751, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5075814499747183, - "sentence_nr": 1 + "score": 0.3238101789644524, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18815571743190213, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.4122750002638689, - "sentence_nr": 1 + "score": 0.375522612679117, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.15412719160788987, - "sentence_nr": 1 + "score": 0.1543252261021413, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5010353699512481, - "sentence_nr": 1 + "score": 0.3572407559404224, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 + "score": 0.3352430929236216, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 + "score": 0.41922948177882463, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.06647168102389285, - "sentence_nr": 1 + "score": 0.2666090188234886, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.34350832619898364, - "sentence_nr": 1 + "score": 0.4310539878732571, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 + "score": 0.4004456831424889, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 + "score": 0.5299539464991493, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.17077058518804336, - "sentence_nr": 1 + "score": 0.3398088489694245, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5022008374701596, - "sentence_nr": 1 + "score": 0.4228308786458922, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.10784756064735967, - "sentence_nr": 1 + "score": 0.2927926577346015, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.4427230465401631, - "sentence_nr": 1 + "score": 0.4165527532106081, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.06656213940646748, - "sentence_nr": 1 + "score": 0.27073362211548463, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.38435741328258305, - "sentence_nr": 1 + "score": 0.3669195863456915, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.1694466724647263, - "sentence_nr": 1 + "score": 0.10401577613691954, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.4902502031746037, - "sentence_nr": 1 + "score": 0.17463781885740615, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.175396614619324, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.49736499605529066, - "sentence_nr": 1 + "score": 0.27890809547716944, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.15154395847232716, - "sentence_nr": 1 + "score": 0.14068535649874328, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.46053919348995803, - "sentence_nr": 1 + "score": 0.21065537154817968, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.1609675245202845, - "sentence_nr": 1 + "score": 0.09207598308796072, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.5069863833094232, - "sentence_nr": 1 + "score": 0.24553846741883023, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4041678259311437, - "sentence_nr": 1 + "score": 0.21070435913784732, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.1290514243115152, - "sentence_nr": 1 + "score": 0.1163540245408256, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4766581477336301, - "sentence_nr": 1 + "score": 0.2115841789715117, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.12422788549118892, - "sentence_nr": 1 + "score": 0.218134321293328, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.40222210564426, - "sentence_nr": 1 + "score": 0.35541240647259675, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.09735981717515908, - "sentence_nr": 1 + "score": 0.19105600040048565, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.35288934658906385, - "sentence_nr": 1 + "score": 0.41505761608077835, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 + "score": 0.2064597158958983, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 + "score": 0.36557785420213534, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.13012870333257068, - "sentence_nr": 1 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3852835519852091, - "sentence_nr": 1 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3356633416447032, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.28789057461471257, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.14040851441890545, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.41423464679009114, - "sentence_nr": 1 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.10467757347424328, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.36749853206282146, - "sentence_nr": 1 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0952569581727979, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.38264808953110185, - "sentence_nr": 1 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.13026649757585426, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.41550755035304077, - "sentence_nr": 1 + "score": 0.49546288984677567, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.039782861678265974, - "sentence_nr": 1 + "score": 0.43795381992037963, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.1175904695048123, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.3996881234028031, - "sentence_nr": 1 + "score": 0.5881561248602009, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.13714845589364738, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.45499281593451946, - "sentence_nr": 1 + "score": 0.40854152133685306, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.11564012893219777, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.44599783682350064, - "sentence_nr": 1 + "score": 0.41213231348812146, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 + "score": 0.40435987083533204, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.12022286401047096, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.48279986805368713, - "sentence_nr": 1 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.15350377490367967, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.47645148444499064, - "sentence_nr": 1 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.38785209659947417, - "sentence_nr": 1 + "score": 0.39858613265631837, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.10565372462613234, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.44438099138270787, - "sentence_nr": 1 + "score": 0.5948724602646328, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.12848168928706002, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4421263683867116, - "sentence_nr": 1 + "score": 0.5042211795038526, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.18629760071299903, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4381418376415505, - "sentence_nr": 1 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.09198045184317984, - "sentence_nr": 1 + "score": 0.27447938256311044, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4598393646838097, - "sentence_nr": 1 + "score": 0.615291848344044, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.07798530247118374, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4006113700211268, - "sentence_nr": 1 + "score": 0.5516607622642397, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.14541923959059266, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.47577612932999147, - "sentence_nr": 1 + "score": 0.47160616105623426, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3026566818840519, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5945859352092411, - "sentence_nr": 1 + "score": 0.33762297226992255, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2521233582161207, - "sentence_nr": 1 + "score": 0.4576529535952892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.5309982646782259, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.2658483576665877, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.6410540990527072, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3182970443542658, - "sentence_nr": 1 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5953162569846108, - "sentence_nr": 1 + "score": 0.6374693500772332, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.18816868192268246, - "sentence_nr": 1 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5179253053631742, - "sentence_nr": 1 + "score": 0.6151179643430991, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.2354441600194623, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5020320865374484, - "sentence_nr": 1 + "score": 0.6562641136790542, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.09629060614977814, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.43565498999747165, - "sentence_nr": 1 + "score": 0.46426595961938383, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.41238100267720657, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.28418123342684043, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.539816402671069, - "sentence_nr": 1 + "score": 0.5705717737418762, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.3765959322920135, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6295826606382191, - "sentence_nr": 1 + "score": 0.410846945789476, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.40801269202545287, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6210533025653295, - "sentence_nr": 1 + "score": 0.39909989628767284, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.07793031063789554, - "sentence_nr": 1 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3700181221537743, - "sentence_nr": 1 + "score": 0.6728506998168392, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0867932999243575, - "sentence_nr": 1 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4201964133235075, - "sentence_nr": 1 + "score": 0.6758978744760765, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.08214106568089705, - "sentence_nr": 1 + "score": 0.17181529671327242, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3969463877642616, - "sentence_nr": 1 + "score": 0.5293474685884572, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.1897299381066278, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5086851537953713, - "sentence_nr": 1 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.3833939462124923, - "sentence_nr": 1 + "score": 0.4410492519530161, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.2718653389257641, - "sentence_nr": 1 + "score": 0.4429196299668147, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.42323664675852685, - "sentence_nr": 1 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.3327209336079636, - "sentence_nr": 1 + "score": 0.3844263765000694, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.1804000267306113, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.451798442226037, - "sentence_nr": 1 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.1777835117834348, - "sentence_nr": 1 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5166806073547074, - "sentence_nr": 1 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.009396473650937872, - "sentence_nr": 1 + "score": 0.6474126202050918, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.13582906387565688, - "sentence_nr": 1 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.43344913217266734, - "sentence_nr": 1 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4637878319059324, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.41649654108052436, - "sentence_nr": 1 + "score": 0.6919476196061328, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3446073377034663, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3630576975795868, - "sentence_nr": 1 + "score": 0.7621696379946562, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0744904632040495, - "sentence_nr": 1 + "score": 0.3237722713145643, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4111163205685468, - "sentence_nr": 1 + "score": 0.7426638026175545, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4363130300030932, - "sentence_nr": 1 + "score": 0.13369377363079382, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.25947507140745757, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.41747276065817185, - "sentence_nr": 1 + "score": 0.6659437947666702, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.07749370908741021, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.3853293582383978, - "sentence_nr": 1 + "score": 0.49342175914364256, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.08950426271691118, - "sentence_nr": 1 + "score": 0.18842393723950338, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.419071051019247, - "sentence_nr": 1 + "score": 0.5854975500881314, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.195647514979229, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.43622390508229153, - "sentence_nr": 1 + "score": 0.5725643788499303, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.13343258247486778, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4018842345370629, - "sentence_nr": 1 + "score": 0.4352628824108997, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22179945921983923, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4250905063113662, - "sentence_nr": 1 + "score": 0.6249971903914197, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.36347800793516216, - "sentence_nr": 1 + "score": 0.5753050684342109, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.06254678076846341, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3887428577633272, - "sentence_nr": 1 + "score": 0.5116862201536014, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.08767210132815903, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.40476518002703893, - "sentence_nr": 1 + "score": 0.33494612818381275, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.08616711094288851, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3696512763473903, - "sentence_nr": 1 + "score": 0.520472515533923, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.12894104034845807, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4486368934849452, - "sentence_nr": 1 + "score": 0.33471616336068044, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.08825252192863794, - "sentence_nr": 1 + "score": 0.28977907494497107, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4377853721520782, - "sentence_nr": 1 + "score": 0.6663117339552681, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.05345137572833361, - "sentence_nr": 1 + "score": 0.2961516536011624, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.3829169125379508, - "sentence_nr": 1 + "score": 0.7355780986981637, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.05422898988559086, - "sentence_nr": 1 + "score": 0.2865612242047131, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.335890201952113, - "sentence_nr": 1 + "score": 0.6433813179203622, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.4414911655469702, - "sentence_nr": 1 + "score": 0.4276859054768592, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.34617921188455225, - "sentence_nr": 1 + "score": 0.43620605921972144, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.0588222649477664, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.3642771871011383, - "sentence_nr": 1 + "score": 0.3598792258309727, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.10186730973904586, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.43665642120840553, - "sentence_nr": 1 + "score": 0.4813598669606701, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.08248974616169381, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.40456777770242314, - "sentence_nr": 1 + "score": 0.5343147728119615, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.10496714075880566, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4262440114275301, - "sentence_nr": 1 + "score": 0.5125809225356253, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.14738500064905094, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4659728395318289, - "sentence_nr": 1 + "score": 0.5963099883424426, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.15386029327005746, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.43911482594829104, - "sentence_nr": 1 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.10070927557742705, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.43718220262892105, - "sentence_nr": 1 + "score": 0.5539920925426138, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.11478463129234825, - "sentence_nr": 1 + "score": 0.2102369368326755, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4651957501593415, - "sentence_nr": 1 + "score": 0.5768887726639784, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.07137101582673294, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4075406301092705, - "sentence_nr": 1 + "score": 0.5326397959358325, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0643329477522681, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3960585990192623, - "sentence_nr": 1 + "score": 0.5226572946586268, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.15050382816942576, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.4714951011303657, - "sentence_nr": 1 + "score": 0.4877445613866086, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.17247941414020762, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.48320144379865687, - "sentence_nr": 1 + "score": 0.5073395824633415, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.057981164297440296, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.33896784137459673, - "sentence_nr": 1 + "score": 0.5073395824633415, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.09751270821852938, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.395617758442078, - "sentence_nr": 1 + "score": 0.4148097947848928, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.06301432444316532, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4249725532507508, - "sentence_nr": 1 + "score": 0.33709347944719925, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.18248753930464637, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4759830743101189, - "sentence_nr": 1 + "score": 0.29382595610734974, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.3370100422576744, - "sentence_nr": 1 + "score": 0.7180407770761651, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3222538601891173, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.1946966569103724, - "sentence_nr": 1 + "score": 0.7495871587703783, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0772718393063023, - "sentence_nr": 1 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4203683137304257, - "sentence_nr": 1 + "score": 0.5773664661124461, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.08291357159799752, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4009694996956877, - "sentence_nr": 1 + "score": 0.3437729074300146, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3714280466838255, - "sentence_nr": 1 + "score": 0.30577290788405437, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3538602132402044, - "sentence_nr": 1 + "score": 0.3053963874050995, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.09026606980896171, - "sentence_nr": 1 + "score": 0.14962848372546667, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.46031801298163716, - "sentence_nr": 1 + "score": 0.5531110803538978, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.167672929900467, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.46910779766306765, - "sentence_nr": 1 + "score": 0.4660343508894544, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.32123020755377657, - "sentence_nr": 1 + "score": 0.43340932146378, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20038908500140973, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.47435308668900444, - "sentence_nr": 1 + "score": 0.6177327642561014, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.08351211898903935, - "sentence_nr": 1 + "score": 0.21258844131063828, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.33030812447506436, - "sentence_nr": 1 + "score": 0.6314891370223008, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.07528927678469202, - "sentence_nr": 1 + "score": 0.15821285888349254, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.422513417362817, - "sentence_nr": 1 + "score": 0.6605676082065987, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.09084091756463074, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4286741659142759, - "sentence_nr": 1 + "score": 0.5873831965245108, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.06126604215610123, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3837677428398438, - "sentence_nr": 1 + "score": 0.5700887051433648, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0756907193511249, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4138725093679467, - "sentence_nr": 1 + "score": 0.5873831965245108, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.08866637424249016, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.44876462229383973, - "sentence_nr": 1 + "score": 0.4133673303529474, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.10574428430204418, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.45371814600333005, - "sentence_nr": 1 + "score": 0.48231853956144055, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3980589439671235, - "sentence_nr": 1 + "score": 0.37314692804855976, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.15124503767921774, - "sentence_nr": 1 + "score": 0.17694975149532557, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4874495869756225, - "sentence_nr": 1 + "score": 0.4902785344040517, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.12649672885841734, - "sentence_nr": 1 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.41329609863930566, - "sentence_nr": 1 + "score": 0.5196343731603573, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.07465265387221826, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4274000630396105, - "sentence_nr": 1 + "score": 0.4254686256509745, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.1219449069656942, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.4776943038671049, - "sentence_nr": 1 + "score": 0.20793313992045814, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.4606409590817001, - "sentence_nr": 1 + "score": 0.4271693186358773, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.07996209785853586, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.4279137012019699, - "sentence_nr": 1 + "score": 0.44583799328544693, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.37030468338190614, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.16764957347186446, - "sentence_nr": 1 + "score": 0.7587397825317436, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.1195053737774238, - "sentence_nr": 1 + "score": 0.29420957081163707, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4512136289975786, - "sentence_nr": 1 + "score": 0.735955064899578, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.21748353646757182, - "sentence_nr": 1 + "score": 0.2722589423069702, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4462746462826943, - "sentence_nr": 1 + "score": 0.7290399536251687, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.24443999371485628, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4991016392840656, - "sentence_nr": 1 + "score": 0.40933226567881303, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.34155562837143877, - "sentence_nr": 1 + "score": 0.5099813007320333, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.1139393935967296, - "sentence_nr": 1 + "score": 0.46778058365701697, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.15066783649768578, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.4483285849553733, - "sentence_nr": 1 + "score": 0.1433583753123658, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.1701935252826955, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.4455315745640286, - "sentence_nr": 1 + "score": 0.49048115595910957, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.11941817189528041, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.4275071634813637, - "sentence_nr": 1 + "score": 0.36660871058936323, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.08932983819566953, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.412238728569517, - "sentence_nr": 1 + "score": 0.4698447771642698, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.35698569920901285, - "sentence_nr": 1 + "score": 0.2891112498777974, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.1275291133503835, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.4127884601900206, - "sentence_nr": 1 + "score": 0.20506702518574138, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.4331286519146886, - "sentence_nr": 1 + "score": 0.15084092981447839, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.3538966478758119, - "sentence_nr": 1 + "score": 0.44419236241196947, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.4179644538349004, - "sentence_nr": 1 + "score": 0.43706010794795863, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.11116091368823534, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.43307401079748475, - "sentence_nr": 1 + "score": 0.40969772112178865, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.07649978886725356, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.41031664319131844, - "sentence_nr": 1 + "score": 0.4134635647455475, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.2626949949898101, - "sentence_nr": 1 + "score": 0.3934230844821369, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.15077540572671325, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.43064021519574214, - "sentence_nr": 1 + "score": 0.3289300072190674, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.3795844422349344, - "sentence_nr": 1 + "score": 0.32413768919026276, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.3964061846611735, - "sentence_nr": 1 + "score": 0.34057065677205645, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.07012053105310272, - "sentence_nr": 1 + "score": 0.21142141714303078, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.3123716745719453, - "sentence_nr": 1 + "score": 0.43493490557877573, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.3856352748003268, - "sentence_nr": 1 + "score": 0.28991415068332943, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0876062628502436, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.3978552283854932, - "sentence_nr": 1 + "score": 0.2535985303909064, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.11378204941109882, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.4981472095171313, - "sentence_nr": 1 + "score": 0.41597114236951854, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.43759229210123524, - "sentence_nr": 1 + "score": 0.4350989271447826, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.10505106462290037, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.4474870048911137, - "sentence_nr": 1 + "score": 0.4297476286175239, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.11534976570369744, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.46761329904761845, - "sentence_nr": 1 + "score": 0.6390929517394389, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0487561532099542, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.3938156291645021, - "sentence_nr": 1 + "score": 0.4714767063337979, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.4098374118843212, - "sentence_nr": 1 + "score": 0.5124045843781208, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.09697921503203778, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.47076286112111615, - "sentence_nr": 1 + "score": 0.4506325776546161, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.11335203496873462, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.46528080200591054, - "sentence_nr": 1 + "score": 0.4754711104013267, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4775280122429458, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.108829546976023, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.4177339268402449, - "sentence_nr": 1 + "score": 0.18900427606312895, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.008776218574747889, - "sentence_nr": 1 + "score": 0.2379375267482382, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.09026606980896171, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.4602880143145438, - "sentence_nr": 1 + "score": 0.1935812904783315, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.1059352062327485, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.4291550754056065, - "sentence_nr": 1 + "score": 0.5406340703314851, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2102369368326755, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.4239838444198129, - "sentence_nr": 1 + "score": 0.5597860306970691, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.0009218289085545725, - "sentence_nr": 1 + "score": 0.2853612459193062, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.14883746844067872, - "sentence_nr": 1 + "score": 0.18842393723950338, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.4476843235219058, - "sentence_nr": 1 + "score": 0.5854975500881314, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.07999819990926477, - "sentence_nr": 1 + "score": 0.15774545980684188, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.3889987132692464, - "sentence_nr": 1 + "score": 0.616730419953906, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.3916082207331212, - "sentence_nr": 1 + "score": 0.44478883235565975, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.45439447866906496, - "sentence_nr": 1 + "score": 0.1400373960430748, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.09171389226334559, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.47356776940569145, - "sentence_nr": 1 + "score": 0.37309390213978083, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.16136315230667173, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.48202529715173736, - "sentence_nr": 1 + "score": 0.3718896131479321, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17534823156623092, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4750212573397775, - "sentence_nr": 1 + "score": 0.4853402799234523, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.003607064963668313, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.09577479457615844, - "sentence_nr": 1 + "score": 0.32889384774917263, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.11161133657801552, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4277891734340718, - "sentence_nr": 1 + "score": 0.2497159712696397, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.15720527174368754, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.47882285385622714, - "sentence_nr": 1 + "score": 0.5335315304967084, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.3374178992279451, - "sentence_nr": 1 + "score": 0.42970160394394363, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.15653859793617866, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.43177798053127925, - "sentence_nr": 1 + "score": 0.4370528005274534, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.3848892678578171, - "sentence_nr": 1 + "score": 0.12716724199879337, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.059281546387121374, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.31614571419525433, - "sentence_nr": 1 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", + "metric": "chrf", + "score": 0.5690206807396397, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.2973352934874205, - "sentence_nr": 1 + "score": 0.5038920264146319, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.15510829053669334, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.4231895807656464, - "sentence_nr": 1 + "score": 0.39818525322365445, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.27560832232663307, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5392779492225674, - "sentence_nr": 1 + "score": 0.554411787673542, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.09545026362079756, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.2895757560105421, - "sentence_nr": 1 + "score": 0.4146222850152668, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.42348732385732035, - "sentence_nr": 1 + "score": 0.4194210013201768, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.08302169728235531, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.3447730755591614, - "sentence_nr": 1 + "score": 0.5277042186210706, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.12785320519680665, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.46613459917836336, - "sentence_nr": 1 + "score": 0.3827580433841417, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.0982484177591637, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.4109236039282987, - "sentence_nr": 1 + "score": 0.15388831190795366, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.10106439835419144, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.44450926478634867, - "sentence_nr": 1 + "score": 0.24316286154385877, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.0891537192318598, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.3970634926176537, - "sentence_nr": 1 + "score": 0.28130650893311265, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.07992844954996121, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.4400081800535333, - "sentence_nr": 1 + "score": 0.33867507760059357, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.09554681544059333, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.41422485590617925, - "sentence_nr": 1 + "score": 0.2996031251762614, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.32015903488199987, - "sentence_nr": 1 + "score": 0.2065195596274271, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.06169438305755944, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.3642077907264287, - "sentence_nr": 1 + "score": 0.13995623895459872, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.14483568709851755, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.45354858647333196, - "sentence_nr": 1 + "score": 0.4674855890757815, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.051823166648868844, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.3292846108177459, - "sentence_nr": 1 + "score": 0.47300840366486596, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.10444675051855158, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.38166524283468484, - "sentence_nr": 1 + "score": 0.6402657401259225, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.021139000776129766, - "sentence_nr": 1 + "score": 0.6478447458847402, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.11663764605404517, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.41241788679161784, - "sentence_nr": 1 + "score": 0.5695248405921098, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.16496711525651045, - "sentence_nr": 1 + "score": 0.1329604040267493, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.052359103292999656, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.3805982553288677, - "sentence_nr": 1 + "score": 0.4471856677359072, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0950136506275681, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4372017487229785, - "sentence_nr": 1 + "score": 0.452279977058944, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0946260953698702, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.4353772493110627, - "sentence_nr": 1 + "score": 0.49816993286090683, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.15714515459910894, - "sentence_nr": 1 + "score": 0.44863216660495664, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.11183036824736405, - "sentence_nr": 1 + "score": 0.3302903244452369, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.1651856335196525, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.4631382298313573, - "sentence_nr": 1 + "score": 0.1342477978716863, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.16543976568828428, - "sentence_nr": 1 + "score": 0.42511022061090775, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.07939562512368398, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.34250389101564743, - "sentence_nr": 1 + "score": 0.4279977800006272, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.07545713066088315, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.35068174137403757, - "sentence_nr": 1 + "score": 0.17013461044703918, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.1113696974855524, - "sentence_nr": 1 + "score": 0.19400141696774292, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.07652593079250605, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.3602429629880003, - "sentence_nr": 1 + "score": 0.1721293079939147, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.12506460115047335, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.46140175133635725, - "sentence_nr": 1 + "score": 0.4411812923020589, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.4404222773455128, - "sentence_nr": 1 + "score": 0.4179911123724079, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.1259356760989446, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.44568274520971096, - "sentence_nr": 1 + "score": 0.3630314170535937, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.1643146814613677, - "sentence_nr": 1 + "score": 0.22637359354764466, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.5127730105039489, - "sentence_nr": 1 + "score": 0.6010070471156334, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.41972158348095406, - "sentence_nr": 1 + "score": 0.5880855470290005, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.3866869165486058, - "sentence_nr": 1 + "score": 0.5535439540882026, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.10700354504676883, - "sentence_nr": 1 + "score": 0.22894156860669912, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.48486759891570147, - "sentence_nr": 1 + "score": 0.6299487983245466, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15362208233245514, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.32485028075459577, - "sentence_nr": 1 + "score": 0.6295157857600502, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.13544894983916997, - "sentence_nr": 1 + "score": 0.18842393723950338, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.4663180024816666, - "sentence_nr": 1 + "score": 0.5854975500881314, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.1289863677885349, - "sentence_nr": 1 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.42110704132809784, - "sentence_nr": 1 + "score": 0.6728506998168392, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.09354237835233341, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.4409559217991841, - "sentence_nr": 1 + "score": 0.6200828204097578, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.12748506711468208, - "sentence_nr": 1 + "score": 0.2722589423069702, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.4508210683728834, - "sentence_nr": 1 + "score": 0.658571547163188, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.10127171102984855, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.4525620764847558, - "sentence_nr": 1 + "score": 0.5800922255460801, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.10833971870416897, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.4467303749319595, - "sentence_nr": 1 + "score": 0.5766882097318834, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.16322494183480127, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.4815584993817062, - "sentence_nr": 1 + "score": 0.5800922255460801, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.08894652425495941, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.444906007048383, - "sentence_nr": 1 + "score": 0.4666156174173635, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.1029835796838552, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4404518759673606, - "sentence_nr": 1 + "score": 0.3893867836646916, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.2172505600894225, - "sentence_nr": 1 + "score": 0.38246468665452293, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.4404811365579724, - "sentence_nr": 1 + "score": 0.21732734812103588, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.12826630655689159, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.36561922835086, - "sentence_nr": 1 + "score": 0.2673895048733062, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.014482940348354725, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.1504461219252398, - "sentence_nr": 1 + "score": 0.28158744196562724, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.11993418633737256, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.4771538581125459, - "sentence_nr": 1 + "score": 0.13678452669387658, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.049266699072917926, - "sentence_nr": 1 + "score": 0.6382466300772751, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.10041064691273172, - "sentence_nr": 1 + "score": 0.1075467277016126, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.44193101759002734, - "sentence_nr": 1 + "score": 0.5012312009859288, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.14163299203710986, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.3958314877752854, - "sentence_nr": 1 + "score": 0.44355652237335036, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.09463828889338871, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.3398200805270262, - "sentence_nr": 1 + "score": 0.40736387061175394, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.0904087252785689, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.41830513174690515, - "sentence_nr": 1 + "score": 0.37489485923390314, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.18237761178381828, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.4897620961756989, - "sentence_nr": 1 + "score": 0.21421851674109063, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.13160881951665948, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.47196475148373473, - "sentence_nr": 1 + "score": 0.15753286601971267, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.2854365802731815, - "sentence_nr": 1 + "score": 0.21413630439620454, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.1760489367713912, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.45847709984838, - "sentence_nr": 1 + "score": 0.30354067465892703, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.3339687893627504, - "sentence_nr": 1 + "score": 0.4283147867664682, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.15216414216709395, - "sentence_nr": 1 + "score": 0.28583707879882797, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.12408616318856698, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.3876257744772486, - "sentence_nr": 1 + "score": 0.4802701406922108, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.13661459309404012, - "sentence_nr": 1 + "score": 0.490032576569998, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.07105699030509427, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.40480969933909144, - "sentence_nr": 1 + "score": 0.47018722626716275, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.1200100437012302, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.4636227306109079, - "sentence_nr": 1 + "score": 0.2460260310809598, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.17208141302168437, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.4542522451167506, - "sentence_nr": 1 + "score": 0.17283382641366998, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.21351902664706998, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5130443042033361, - "sentence_nr": 1 + "score": 0.20022065596322774, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.10553179283083523, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.4283144779172244, - "sentence_nr": 1 + "score": 0.14697628025481496, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.10814706353513916, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.4216597036907072, - "sentence_nr": 1 + "score": 0.24657376730321656, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.38105951101537255, - "sentence_nr": 1 + "score": 0.291098552209934, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.10102972193860417, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4444474600259224, - "sentence_nr": 1 + "score": 0.4263413781248326, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.1785750235950628, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.43344212044851604, - "sentence_nr": 1 + "score": 0.4359367610984378, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3562570068438905, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.16508680260754793, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.4714504120878508, - "sentence_nr": 1 + "score": 0.11383643766535269, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.07862112679032317, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.4059503829406287, - "sentence_nr": 1 + "score": 0.16579761015459532, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.14400312819313033, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.43256046228969486, - "sentence_nr": 1 + "score": 0.191825135463227, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.06289570792563275, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3813881170279124, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2887138086538547, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.38763756150559275, - "sentence_nr": 1 + "score": 0.6342291345998248, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.16269986423611488, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.11960636789197196, - "sentence_nr": 1 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4498565343058379, - "sentence_nr": 1 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.06530397960697328, - "sentence_nr": 1 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.36899545840843095, - "sentence_nr": 1 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.05364480688581678, - "sentence_nr": 1 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3678203159539189, - "sentence_nr": 1 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.10060184892331835, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.43940648106882807, - "sentence_nr": 1 + "score": 0.2828367156737383, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.09042147098974282, - "sentence_nr": 1 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.45055351363359086, - "sentence_nr": 1 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.06758583657764057, - "sentence_nr": 1 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.40657395859582235, - "sentence_nr": 1 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.15670253601070666, - "sentence_nr": 1 + "score": 0.44897710722021167, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.4663740399508032, - "sentence_nr": 1 + "score": 0.6862249089515978, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.08269576405332207, - "sentence_nr": 1 + "score": 0.404727200247809, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.38185156188383546, - "sentence_nr": 1 + "score": 0.6681898017773897, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.05014252780397407, - "sentence_nr": 1 + "score": 0.40276720463657734, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.38032629067357443, - "sentence_nr": 1 + "score": 0.6529271690805427, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.19045679700622437, - "sentence_nr": 1 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.4124342444810736, - "sentence_nr": 1 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5379348324975908, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.2398247112527542, - "sentence_nr": 1 + "score": 0.7703766110349561, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.30188353873287377, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.06939838145153245, - "sentence_nr": 1 + "score": 0.6086565367747951, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.1384529882948561, - "sentence_nr": 1 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.38277790453523536, - "sentence_nr": 1 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.1162066330922535, - "sentence_nr": 1 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.3781611496143332, - "sentence_nr": 1 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.07454232971572508, - "sentence_nr": 1 + "score": 0.6026286934891149, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.32060982856396664, - "sentence_nr": 1 + "score": 0.8025775976044891, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.42583748090002016, - "sentence_nr": 1 + "score": 0.713787745993602, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5896613549548209, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.24120995733605022, - "sentence_nr": 1 + "score": 0.7528914749586836, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.10825804306599494, - "sentence_nr": 1 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.3972611038417328, - "sentence_nr": 1 + "score": 0.713787745993602, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.15969495416406884, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4493675427485572, - "sentence_nr": 1 + "score": 0.24706467963183681, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4801289744823913, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.21229683306385236, - "sentence_nr": 1 + "score": 0.6766690087429765, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.10500492468363652, - "sentence_nr": 1 + "score": 0.3272712268138726, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.35306452262726606, - "sentence_nr": 1 + "score": 0.6272846474183881, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.05275923024775565, - "sentence_nr": 1 + "score": 0.3665134361137304, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.3724723203846839, - "sentence_nr": 1 + "score": 0.6118771029352303, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.28489318277723963, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.3544628606759813, - "sentence_nr": 1 + "score": 0.5764325110247531, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2981792160679168, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.3371547585108182, - "sentence_nr": 1 + "score": 0.5788026000794341, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.347335662744532, - "sentence_nr": 1 + "score": 0.2799331151961311, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6471892368478446, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.3436708646772823, - "sentence_nr": 1 + "score": 0.8142499721936278, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.05182797087573874, - "sentence_nr": 1 + "score": 0.7012294787544179, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.3356084649197975, - "sentence_nr": 1 + "score": 0.8478115719875968, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.0709399674988252, - "sentence_nr": 1 + "score": 0.23114663823833642, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4066552319349635, - "sentence_nr": 1 + "score": 0.5786592584609213, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.08964131615841985, - "sentence_nr": 1 + "score": 0.23713320246552005, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3962711438859162, - "sentence_nr": 1 + "score": 0.6106842970161642, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.05980107027395768, - "sentence_nr": 1 + "score": 0.21690365808279138, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3921687449014443, - "sentence_nr": 1 + "score": 0.5384773678665918, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0842991091574967, - "sentence_nr": 1 + "score": 0.3060368950930089, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.37839472970450666, - "sentence_nr": 1 + "score": 0.6736142284622013, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6888365053466561, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.021188116207401797, - "sentence_nr": 1 + "score": 0.8656273480576243, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.059737095980317775, - "sentence_nr": 1 + "score": 0.25711386542134795, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.33742048813623593, - "sentence_nr": 1 + "score": 0.6088853751738869, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.12650809806003369, - "sentence_nr": 1 + "score": 0.45307778036928104, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4579202271851988, - "sentence_nr": 1 + "score": 0.6935397252637394, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.09676230489828269, - "sentence_nr": 1 + "score": 0.5069487414732323, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.43266369498706486, - "sentence_nr": 1 + "score": 0.7801245319017357, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.1691386174483793, - "sentence_nr": 1 + "score": 0.5695988432761473, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4920789340026317, - "sentence_nr": 1 + "score": 0.7516103467926585, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.26337200877742073, - "sentence_nr": 1 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5332559901749826, - "sentence_nr": 1 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.12820355595850366, - "sentence_nr": 1 + "score": 0.7196315267102845, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.3621325951848801, - "sentence_nr": 1 + "score": 0.8835331636515565, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.27031481031630283, - "sentence_nr": 1 + "score": 0.7361065921505279, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.19217857276238626, - "sentence_nr": 1 + "score": 0.30752616970214336, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.49145817895698257, - "sentence_nr": 1 + "score": 0.5976254557718147, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.1348878985611687, - "sentence_nr": 1 + "score": 0.40157733283424196, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4519788039621858, - "sentence_nr": 1 + "score": 0.6532350818978572, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.05350911980603496, - "sentence_nr": 1 + "score": 0.38091370416670794, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3720359854835493, - "sentence_nr": 1 + "score": 0.6438225861756911, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.11248337299167142, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.47408452973613896, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.03735667108797313, - "sentence_nr": 1 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.15907551162629324, - "sentence_nr": 1 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4524235916096891, - "sentence_nr": 1 + "score": 0.7202697992734389, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.19135523280427486, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.49947805136320467, - "sentence_nr": 1 + "score": 0.24363783193706642, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2356661678654945, - "sentence_nr": 1 + "score": 0.3903594390682207, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5124350706386419, - "sentence_nr": 1 + "score": 0.6662116837137958, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.14944432524273302, - "sentence_nr": 1 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4972796478830659, - "sentence_nr": 1 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2128223810599462, - "sentence_nr": 1 + "score": 0.6349495142258627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5048999729332083, - "sentence_nr": 1 + "score": 0.7749613594649343, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2159761642923436, - "sentence_nr": 1 + "score": 0.4719458927872361, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5016920563570307, - "sentence_nr": 1 + "score": 0.6863265729154345, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.20689377284100188, - "sentence_nr": 1 + "score": 0.5309354663044072, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.499181874773421, - "sentence_nr": 1 + "score": 0.6990707992725005, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.15954322482017244, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4866743072979551, - "sentence_nr": 1 + "score": 0.31008822704072875, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4522625000672462, - "sentence_nr": 1 + "score": 0.31008822704072875, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.005506479027884721, - "sentence_nr": 1 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.12804576923274616, - "sentence_nr": 1 + "score": 0.4506667273103674, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.231679638185573, - "sentence_nr": 1 + "score": 0.224188058954654, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5128772172047342, - "sentence_nr": 1 + "score": 0.5978847447208526, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2128223810599462, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5048999729332083, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.11469202308276233, - "sentence_nr": 1 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.47736356509677796, - "sentence_nr": 1 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.10588612806056373, - "sentence_nr": 1 + "score": 0.19920494035049138, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4068718481729766, - "sentence_nr": 1 + "score": 0.614209720001149, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.10720391954020723, - "sentence_nr": 1 + "score": 0.7221847203387323, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.37219605281253065, - "sentence_nr": 1 + "score": 0.8931067231936596, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.09793316925795417, - "sentence_nr": 1 + "score": 0.3416581331218724, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4297577431879659, - "sentence_nr": 1 + "score": 0.6578570934289981, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.10461993210426317, - "sentence_nr": 1 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.411539574168363, - "sentence_nr": 1 + "score": 0.7644556249154987, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.09232875412879928, - "sentence_nr": 1 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.40574729737089493, - "sentence_nr": 1 + "score": 0.7342525133793019, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.054115495307563, - "sentence_nr": 1 + "score": 0.4797543511401896, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.3261514049985403, - "sentence_nr": 1 + "score": 0.7240781310560407, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.13894512516215204, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.45252610009573474, - "sentence_nr": 1 + "score": 0.14107526427034148, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.06986280403265237, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.35513587606181224, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.6401876410870359, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.7526484951226097, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.11511385959745848, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.43904137765866535, - "sentence_nr": 1 + "score": 0.24914989711092594, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.33057129676705455, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.24802563498261762, - "sentence_nr": 1 + "score": 0.5669225664686625, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.15017237887090715, - "sentence_nr": 1 + "score": 0.33057129676705455, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4481403580546466, - "sentence_nr": 1 + "score": 0.5669225664686625, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0967458811247473, - "sentence_nr": 1 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4485783191522753, - "sentence_nr": 1 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.06851723496815999, - "sentence_nr": 1 + "score": 0.5468017145144113, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.40911149660575097, - "sentence_nr": 1 + "score": 0.7519227909172003, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.14184998906630783, - "sentence_nr": 1 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.44498159653494584, - "sentence_nr": 1 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.098684745093626, - "sentence_nr": 1 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.4007829842063641, - "sentence_nr": 1 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.12459539355692184, - "sentence_nr": 1 + "score": 0.617939643800199, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.4660673682008178, - "sentence_nr": 1 + "score": 0.8356543644789964, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.05089649055811939, - "sentence_nr": 1 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.332316583093035, - "sentence_nr": 1 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4226166554364405, - "sentence_nr": 1 + "score": 0.30042054271881197, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.27720938018510377, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4257191895245898, - "sentence_nr": 1 + "score": 0.428047180290638, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.08255714494862634, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4304002627139641, - "sentence_nr": 1 + "score": 0.30350690419450826, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4447505575009147, - "sentence_nr": 1 + "score": 0.22847893469128855, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6281881652405527, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.34130342683703757, - "sentence_nr": 1 + "score": 0.7361567090943679, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.09309060319054646, - "sentence_nr": 1 + "score": 0.569133886912883, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4298405489249517, - "sentence_nr": 1 + "score": 0.6834516951654327, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.13308739447486365, - "sentence_nr": 1 + "score": 0.18951629567590744, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3932447622969156, - "sentence_nr": 1 + "score": 0.5515559648122452, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.27249745234058675, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.36741937011390374, - "sentence_nr": 1 + "score": 0.576487806400357, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.11220450894323894, - "sentence_nr": 1 + "score": 0.32078739729528816, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.46129962837218175, - "sentence_nr": 1 + "score": 0.5779838399768712, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.1495364895327543, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.47363863746713725, - "sentence_nr": 1 + "score": 0.25811803218589047, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.09996446612314541, - "sentence_nr": 1 + "score": 0.4814564802258215, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.38170822021413087, - "sentence_nr": 1 + "score": 0.7954823723658209, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.04590350535783419, - "sentence_nr": 1 + "score": 0.3742893656007335, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.3590907949514895, - "sentence_nr": 1 + "score": 0.7582803042224814, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.07468620780110702, - "sentence_nr": 1 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.3836013751816852, - "sentence_nr": 1 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.10539949748136965, - "sentence_nr": 1 + "score": 0.602867050301643, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.43372525820935726, - "sentence_nr": 1 + "score": 0.8176176657543648, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.5386695403411698, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.08313078148769443, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.37751928853187794, - "sentence_nr": 1 + "score": 0.17601203382268035, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0017825311942959, - "sentence_nr": 1 + "score": 0.671938683171001, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.060825626903085836, - "sentence_nr": 1 + "score": 0.5206571060403834, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.367772007695087, - "sentence_nr": 1 + "score": 0.689324258927, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4174441728660793, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3645369664653625, - "sentence_nr": 1 + "score": 0.6692136096184196, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3275292968031138, - "sentence_nr": 1 + "score": 0.636016958488394, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.08932983819566953, - "sentence_nr": 1 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.37462132890676997, - "sentence_nr": 1 + "score": 0.636016958488394, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.12151683896637884, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.3874631848880938, - "sentence_nr": 1 + "score": 0.4794224895461657, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.10204941450542204, - "sentence_nr": 1 + "score": 0.17150296156301634, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.3642762802151916, - "sentence_nr": 1 + "score": 0.48812954881732445, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.2954316287998063, - "sentence_nr": 1 + "score": 0.46076979395163187, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.38351345508808277, - "sentence_nr": 1 + "score": 0.29623686353922923, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.08121271060180286, - "sentence_nr": 1 + "score": 0.41682189465797687, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.38347124715279823, - "sentence_nr": 1 + "score": 0.6573099561830166, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.2852636439147137, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5851048071392815, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.37494051432044967, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.346134163535414, - "sentence_nr": 1 + "score": 0.661973437204244, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.25122591039975606, - "sentence_nr": 1 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.142144689462689, - "sentence_nr": 1 + "score": 0.37494051432044967, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4331731101712559, - "sentence_nr": 1 + "score": 0.661973437204244, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.15453746478246141, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4413516563123831, - "sentence_nr": 1 + "score": 0.30407761511253945, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.1383193561213217, - "sentence_nr": 1 + "score": 0.5461499540157965, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4229717720106369, - "sentence_nr": 1 + "score": 0.7954823723658209, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.14846392828893068, - "sentence_nr": 1 + "score": 0.14628563604185, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.44939103256256696, - "sentence_nr": 1 + "score": 0.4777301300307737, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.12146424147064877, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.4236242053572171, - "sentence_nr": 1 + "score": 0.14221479650735855, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.1094074965643601, - "sentence_nr": 1 + "score": 0.31177258041697303, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.40564547968508147, - "sentence_nr": 1 + "score": 0.4558951086991579, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.26035572673286655, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.21262860902391906, - "sentence_nr": 1 + "score": 0.40109985662775005, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.25472503432861054, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.49230982416428504, - "sentence_nr": 1 + "score": 0.4912131536580228, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.13364464646895982, - "sentence_nr": 1 + "score": 0.19018868394774802, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.420485716773103, - "sentence_nr": 1 + "score": 0.5224363928471276, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.014058355159408403, - "sentence_nr": 1 + "score": 0.2989381657659374, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.10642944544652122, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.4272539643561774, - "sentence_nr": 1 + "score": 0.3052690053887312, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.11665236403515139, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.10713148568717314, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.41522111700393083, - "sentence_nr": 1 + "score": 0.19075975291258387, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.3539070801331386, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.11340129142744679, - "sentence_nr": 1 + "score": 0.46866709139162926, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.4168800407013454, - "sentence_nr": 1 + "score": 0.7535887063318502, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.07438681343481453, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.3894532190798538, - "sentence_nr": 1 + "score": 0.20731650338051813, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.12289012856297825, - "sentence_nr": 1 + "score": 0.30094298890378757, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.4301477375362509, - "sentence_nr": 1 + "score": 0.5463695830483137, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4529852871970908, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.1585163492096374, - "sentence_nr": 1 + "score": 0.6379815839992429, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4719458927872361, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.43788019223348373, - "sentence_nr": 1 + "score": 0.6884790828920573, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.11765941642483725, - "sentence_nr": 1 + "score": 0.6912804407652906, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.44984502263523063, - "sentence_nr": 1 + "score": 0.8449079689944796, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.12108876184031253, - "sentence_nr": 1 + "score": 0.47229389414007084, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.3771925448240792, - "sentence_nr": 1 + "score": 0.7400562860667964, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5401725898595141, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3814511364616612, - "sentence_nr": 1 + "score": 0.717128056256897, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5401725898595141, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.2862195367689212, - "sentence_nr": 1 + "score": 0.7219273458493682, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.08824413655138029, - "sentence_nr": 1 + "score": 0.44353395455270217, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3977541835364748, - "sentence_nr": 1 + "score": 0.6913921626327173, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15821285888349254, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.3702987017023586, - "sentence_nr": 1 + "score": 0.4716642229159947, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39537383933343595, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.3241317524160092, - "sentence_nr": 1 + "score": 0.617311647158499, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.07351652222518425, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.3862617013651048, - "sentence_nr": 1 + "score": 0.4393606972268638, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.38224927613981324, - "sentence_nr": 1 + "score": 0.19653306323688033, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42643704825557327, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.37754295227618245, - "sentence_nr": 1 + "score": 0.7385191646867102, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.17228284869075539, - "sentence_nr": 1 + "score": 0.6511785024442115, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.10455435536860881, - "sentence_nr": 1 + "score": 0.31011575752288345, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.41817390114586295, - "sentence_nr": 1 + "score": 0.6452682411767686, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5021277621795815, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.2594621783720232, - "sentence_nr": 1 + "score": 0.761461458169805, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.32393211943598493, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.1974952222966699, - "sentence_nr": 1 + "score": 0.6474115867020543, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1998573974138024, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.37756676543137707, - "sentence_nr": 1 + "score": 0.48166604565689325, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.19258818005939538, - "sentence_nr": 1 + "score": 0.5155781222766946, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.14757581190431865, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.3162255423673242, - "sentence_nr": 1 + "score": 0.4573311375774372, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.30718853768673293, - "sentence_nr": 1 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.3499024158832446, - "sentence_nr": 1 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.12587301409115934, - "sentence_nr": 1 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.43278573034203477, - "sentence_nr": 1 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.08072859763900794, - "sentence_nr": 1 + "score": 0.30643882011101126, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.3683463348059566, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.16140904075314855, - "sentence_nr": 1 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.09640029388493841, - "sentence_nr": 1 + "score": 0.480771131185851, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.44383188407096436, - "sentence_nr": 1 + "score": 0.7032048786770096, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.09092206673648158, - "sentence_nr": 1 + "score": 0.6358974376699329, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4200146131210127, - "sentence_nr": 1 + "score": 0.736661937085844, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17059573701616795, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.27627389434334787, - "sentence_nr": 1 + "score": 0.4753746252238087, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18107197870881736, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.3701108638788564, - "sentence_nr": 1 + "score": 0.514661439036253, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1813423031516851, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.10823255315380634, - "sentence_nr": 1 + "score": 0.4972101263590737, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18107197870881736, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.10849792605943348, - "sentence_nr": 1 + "score": 0.514661439036253, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19835441454182887, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.4312123024580457, - "sentence_nr": 1 + "score": 0.6062730082124886, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.24946780875926136, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.06500924965575555, - "sentence_nr": 1 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.389301118498321, - "sentence_nr": 1 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.42962123952100073, - "sentence_nr": 1 + "score": 0.2797830107070484, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.08380035569969414, - "sentence_nr": 1 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.41608423823971435, - "sentence_nr": 1 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4727805712999679, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.3682635438782073, - "sentence_nr": 1 + "score": 0.7717158158167359, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.1643756453595719, - "sentence_nr": 1 + "score": 0.39670882908365773, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.5131518108984869, - "sentence_nr": 1 + "score": 0.5348878791728369, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.27447938256311044, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.41902495095742714, - "sentence_nr": 1 + "score": 0.5315032895817616, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39670882908365773, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.3645487079754606, - "sentence_nr": 1 + "score": 0.5409379877245147, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1906936342773436, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.3500022174766425, - "sentence_nr": 1 + "score": 0.5160021246888273, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2534743707366162, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.3013590931650816, - "sentence_nr": 1 + "score": 0.6254912096804822, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.11220450894323894, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.4103718597593798, - "sentence_nr": 1 + "score": 0.36291227725384023, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.33713540983351536, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.29873361351172023, - "sentence_nr": 1 + "score": 0.6250009083207365, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.08186981924084771, - "sentence_nr": 1 + "score": 0.5186653964016543, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.36422083962860535, - "sentence_nr": 1 + "score": 0.6561896817871797, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.07752927781917028, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.3238609427019678, - "sentence_nr": 1 + "score": 0.40562163465277223, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.10183633383410681, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.3805172880929802, - "sentence_nr": 1 + "score": 0.2922968824016215, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.2704106810852134, - "sentence_nr": 1 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.05255579792886986, - "sentence_nr": 1 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.33573064840973227, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.4115265273644293, - "sentence_nr": 1 + "score": 0.7081054397334158, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, + "score": 0.5263595737059831, + "sentence_nr": 5 + }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.33709419207258606, - "sentence_nr": 1 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.33573064840973227, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.12459211652309463, - "sentence_nr": 1 + "score": 0.7081054397334158, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.07774648652101643, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.38100957871754465, - "sentence_nr": 1 + "score": 0.1375101316530452, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5540102467708582, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8012607361988002, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.07860263587184375, - "sentence_nr": 1 + "score": 0.1897992267368494, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.37524253175141375, - "sentence_nr": 1 + "score": 0.4726855583591889, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.3092395616495983, - "sentence_nr": 1 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0811151580341062, - "sentence_nr": 1 + "score": 0.7482524153102477, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.4179228886149028, - "sentence_nr": 1 + "score": 0.8447038922744422, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.04318453178079916, - "sentence_nr": 1 + "score": 0.5021277621795815, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.3381884955798567, - "sentence_nr": 1 + "score": 0.6665605281744408, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.18025686784380132, - "sentence_nr": 1 + "score": 0.1821163528973126, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1983544145418289, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.25002690670423616, - "sentence_nr": 1 + "score": 0.4195908478809098, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.015512263616295723, - "sentence_nr": 1 + "score": 0.27970267298955453, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.43293305745010263, - "sentence_nr": 1 + "score": 0.32937303862037204, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.32529077705920345, - "sentence_nr": 1 + "score": 0.3124684968073947, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.29948848396607075, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4341999352730602, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.3897147415993498, - "sentence_nr": 1 + "score": 0.6745907228091957, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1986589078880532, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5285168275193599, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20110004903792847, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.2801941853357009, - "sentence_nr": 1 + "score": 0.45623478126637707, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.15333726274185422, - "sentence_nr": 1 + "score": 0.1987777011513927, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.36857838224116973, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.14176967102285878, - "sentence_nr": 1 + "score": 0.6856616009150279, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3384653583738009, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.15856726741880453, - "sentence_nr": 1 + "score": 0.6082869404281873, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.47375069012411286, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.2830740020655188, - "sentence_nr": 1 + "score": 0.7107240028283889, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4727805712999679, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.1386688771726747, - "sentence_nr": 1 + "score": 0.7717158158167359, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.44476089284108944, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.1303104091598296, - "sentence_nr": 1 + "score": 0.6551098696198423, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.11487251192182539, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.3838034014383599, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.2758428491606746, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.22090835035926976, - "sentence_nr": 1 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4183007445500922, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.3394587857367724, - "sentence_nr": 1 + "score": 0.6544146882590995, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4183007445500922, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.0008865248226950354, - "sentence_nr": 1 + "score": 0.6544146882590995, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.41682189465797687, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.12451389734392344, - "sentence_nr": 1 + "score": 0.647688351711303, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.06061016244701235, - "sentence_nr": 1 + "score": 0.803154665668484, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.3480533968220821, - "sentence_nr": 1 + "score": 0.8805305626734038, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6838626312597372, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.3107132702855867, - "sentence_nr": 1 + "score": 0.8481552379853444, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.06622410994100032, - "sentence_nr": 1 + "score": 0.803154665668484, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.42506963891617355, - "sentence_nr": 1 + "score": 0.8805305626734038, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.06510536366860005, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.4267595335841956, - "sentence_nr": 1 + "score": 0.4462689092414285, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.0908223691374129, - "sentence_nr": 1 + "score": 0.20323131695812172, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.35086527201687273, - "sentence_nr": 1 + "score": 0.5370679638669973, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.0504505902029893, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.3325735546737679, - "sentence_nr": 1 + "score": 0.46829007045350673, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.1102887395214814, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.4443176362793868, - "sentence_nr": 1 + "score": 0.42359095518407164, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.3452527406212403, - "sentence_nr": 1 + "score": 0.9063898435384111, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.04445259375670958, - "sentence_nr": 1 + "score": 0.23932595221309674, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.3108884123742351, - "sentence_nr": 1 + "score": 0.4673115526141697, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.06793347054687501, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.37328029958450787, - "sentence_nr": 1 + "score": 0.5227131146872793, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2046592065585361, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.0008383635144198525, - "sentence_nr": 1 + "score": 0.5139378364418256, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.10082211195764058, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.39430575805608015, - "sentence_nr": 1 + "score": 0.39020358281213624, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.08459573412751416, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.4172605432414846, - "sentence_nr": 1 + "score": 0.2661828424443392, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.08986406706995408, - "sentence_nr": 1 + "score": 0.165838472529457, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.44470674434718094, - "sentence_nr": 1 + "score": 0.38908651109487247, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.4109749814872678, - "sentence_nr": 1 + "score": 0.3337972903996398, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.12508210748173035, - "sentence_nr": 1 + "score": 0.17411036809769512, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.43052214251110127, - "sentence_nr": 1 + "score": 0.5409636216635109, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4374960951307028, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.15601281434649325, - "sentence_nr": 1 + "score": 0.6840706293465405, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16679551613797314, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.15226733582025143, - "sentence_nr": 1 + "score": 0.4850274766865928, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3025029865727436, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.39627479466482446, - "sentence_nr": 1 + "score": 0.5564009706295315, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.2512105738558467, - "sentence_nr": 1 + "score": 0.4651954337860559, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3764940106481337, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.07135414938965279, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.3801021795977089, - "sentence_nr": 1 + "score": 0.4021117013686505, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.0007433838834374071, - "sentence_nr": 1 + "score": 0.4152914707667959, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.3362234868051281, - "sentence_nr": 1 + "score": 0.36466819017308727, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.07565762629954577, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.3606232238015037, - "sentence_nr": 1 + "score": 0.15942178318590763, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.12594843055469976, - "sentence_nr": 1 + "score": 0.36039626112317097, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.4353555563309006, - "sentence_nr": 1 + "score": 0.5942499629418814, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.07142908588092715, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.3642310370662869, - "sentence_nr": 1 + "score": 0.3078802898940204, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.07679233641842272, - "sentence_nr": 1 + "score": 0.3292010361291119, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.3862824156465965, - "sentence_nr": 1 + "score": 0.6484221669130951, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.06574770404484663, - "sentence_nr": 1 + "score": 0.4246163317880344, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.38086180678047993, - "sentence_nr": 1 + "score": 0.6675494539138593, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.278093559995945, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.13105556640848193, - "sentence_nr": 1 + "score": 0.5759531667584591, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.15600871137617922, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.4717352325952083, - "sentence_nr": 1 + "score": 0.2869208283752505, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.08673909463463786, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.39672072250261947, - "sentence_nr": 1 + "score": 0.3243192696860874, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2229529832462866, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.15095832595105924, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.46999349033543664, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3145998179666375, - "sentence_nr": 1 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.13717476208873386, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4016394464190868, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3186898662502609, - "sentence_nr": 1 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.06656213940646744, - "sentence_nr": 1 + "score": 0.3390387389794623, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3842510919126927, - "sentence_nr": 1 + "score": 0.6170420596680538, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.06946125044973972, - "sentence_nr": 1 + "score": 0.3142665434344143, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.37972229376763555, - "sentence_nr": 1 + "score": 0.6466526067220029, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.06278759018603328, - "sentence_nr": 1 + "score": 0.4216890913810254, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.3835626087751843, - "sentence_nr": 1 + "score": 0.6885217194158456, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.07801890264772814, - "sentence_nr": 1 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.3553195236646342, - "sentence_nr": 1 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.02074180194079426, - "sentence_nr": 1 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.25643534797086653, - "sentence_nr": 1 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.43976907726931086, - "sentence_nr": 1 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.10101113530957895, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.384568799517898, - "sentence_nr": 1 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.31773604252301485, - "sentence_nr": 1 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.23436994609974687, - "sentence_nr": 1 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.18083281963129427, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.056200079175203074, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.3452518222522092, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.15996142821020284, - "sentence_nr": 1 + "score": 0.5088645484558708, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.42995669154818883, - "sentence_nr": 1 + "score": 0.6991726442472661, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.06922310590511903, - "sentence_nr": 1 + "score": 0.4101479464529936, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.39694083278594716, - "sentence_nr": 1 + "score": 0.7041976254287654, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.1339039164909805, - "sentence_nr": 1 + "score": 0.4547900039222725, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.44979655276903346, - "sentence_nr": 1 + "score": 0.6541971428810075, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.22159156633820476, - "sentence_nr": 1 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.47469899099393, - "sentence_nr": 1 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.13857603724877052, - "sentence_nr": 1 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4531342308478503, - "sentence_nr": 1 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.06775586518289999, - "sentence_nr": 1 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.36597017334019843, - "sentence_nr": 1 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.08533222289741706, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.40809636487511675, - "sentence_nr": 1 + "score": 0.8020845125558708, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.11006586190644709, - "sentence_nr": 1 + "score": 0.595092211343687, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.44843770079885176, - "sentence_nr": 1 + "score": 0.7971172820981081, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.11791514636772135, - "sentence_nr": 1 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.45829245019901393, - "sentence_nr": 1 + "score": 0.7122562458056777, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.1384494600711195, - "sentence_nr": 1 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.41953659012152644, - "sentence_nr": 1 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.008635616559445383, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.16760779378610222, - "sentence_nr": 1 + "score": 0.32263864160302524, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4362677560900551, - "sentence_nr": 1 + "score": 0.6824395076981005, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.16234678312329395, - "sentence_nr": 1 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4114313966468408, - "sentence_nr": 1 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.14108777831558816, - "sentence_nr": 1 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.38610201135781486, - "sentence_nr": 1 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.07152747748412269, - "sentence_nr": 1 + "score": 0.38305978177479755, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.38927458491364797, - "sentence_nr": 1 + "score": 0.6061131723054572, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.14163299203710986, - "sentence_nr": 1 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3918120503690342, - "sentence_nr": 1 + "score": 0.58198979036704, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.14163299203710986, - "sentence_nr": 1 + "score": 0.42221847853238736, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3951425639999114, - "sentence_nr": 1 + "score": 0.6656008733100179, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.13959457580667745, - "sentence_nr": 1 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.39278180480046854, - "sentence_nr": 1 + "score": 0.58198979036704, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.212269197708987, - "sentence_nr": 1 + "score": 0.5124776602965491, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.432532095093549, - "sentence_nr": 1 + "score": 0.7722874800637285, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3992714815075659, - "sentence_nr": 1 + "score": 0.7994721822064033, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.07124462426516306, - "sentence_nr": 1 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.41510187108032215, - "sentence_nr": 1 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.16580720845461236, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.4090525704213402, - "sentence_nr": 1 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.08630492424721987, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.3317478812781943, - "sentence_nr": 1 + "score": 0.5512324461754572, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.07256581912261388, - "sentence_nr": 1 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.3778626622264389, - "sentence_nr": 1 + "score": 0.7289444696770301, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0889604331153271, - "sentence_nr": 1 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.4174106361046784, - "sentence_nr": 1 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.35972456016417403, - "sentence_nr": 1 + "score": 0.8215788698315908, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0857750978817917, - "sentence_nr": 1 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.44136113805162547, - "sentence_nr": 1 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.12439394521251071, - "sentence_nr": 1 + "score": 0.29170205300854224, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.48382498181532896, - "sentence_nr": 1 + "score": 0.6498499527552988, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.09453418134278709, - "sentence_nr": 1 + "score": 0.2719326877457978, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4530199895993529, - "sentence_nr": 1 + "score": 0.6002086362682414, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3988248173608407, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4420660206317646, - "sentence_nr": 1 + "score": 0.7121135616759211, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1200100437012302, - "sentence_nr": 1 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.44442321598107215, - "sentence_nr": 1 + "score": 0.7395804946242599, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1651448129209979, - "sentence_nr": 1 + "score": 0.3684981984538114, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5035927049098079, - "sentence_nr": 1 + "score": 0.5606332518476288, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.1057412571039566, - "sentence_nr": 1 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4429339067969458, - "sentence_nr": 1 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.0645756376303251, - "sentence_nr": 1 + "score": 0.3201911827891037, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.42302002481751566, - "sentence_nr": 1 + "score": 0.7182383858693244, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.09402885411258183, - "sentence_nr": 1 + "score": 0.4536404448264584, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4323274751516209, - "sentence_nr": 1 + "score": 0.8020827133708689, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.29764050036303846, - "sentence_nr": 1 + "score": 0.723627810424739, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.2883740704360469, - "sentence_nr": 1 + "score": 0.8084123599808738, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4545091839935173, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.2604470328007762, - "sentence_nr": 1 + "score": 0.7166050399790445, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.267065521919161, - "sentence_nr": 1 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4812700337596407, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.13148666942655857, - "sentence_nr": 1 + "score": 0.7668482135865776, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3370129264673147, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.12243763324014527, - "sentence_nr": 1 + "score": 0.7096874943799061, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.06752080860674345, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3920675082035874, - "sentence_nr": 1 + "score": 0.12648351910430983, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19910401453355991, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.26975886482101524, - "sentence_nr": 1 + "score": 0.5815343547138478, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1624355752882384, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.23716589277972214, - "sentence_nr": 1 + "score": 0.4952968469712617, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3766019021279213, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.25934628189580383, - "sentence_nr": 1 + "score": 0.7318674193893624, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.37489047453628294, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.14394171731439506, - "sentence_nr": 1 + "score": 0.7155230965848066, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.25058279102061404, - "sentence_nr": 1 + "score": 0.7807505267551733, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.2750949112536697, - "sentence_nr": 1 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.369396410785335, - "sentence_nr": 1 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5595205105615875, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.30886876402238045, - "sentence_nr": 1 + "score": 0.8322210048001876, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.06639671070426982, - "sentence_nr": 1 + "score": 0.3060368950930089, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4050079022771937, - "sentence_nr": 1 + "score": 0.6834837188844622, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3942058093215873, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.32906029723232294, - "sentence_nr": 1 + "score": 0.6697898834930974, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3142665434344143, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.027675420219657812, - "sentence_nr": 1 + "score": 0.6466526067220029, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.16631693106339326, - "sentence_nr": 1 + "score": 0.1973212456326944, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.43025731672242257, - "sentence_nr": 1 + "score": 0.4151043049244464, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.2799307164966019, - "sentence_nr": 1 + "score": 0.49125115898082056, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.1910051568962051, - "sentence_nr": 1 + "score": 0.49125115898082056, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.31549317456416015, - "sentence_nr": 1 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.15854552704770836, - "sentence_nr": 1 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.30382216559902564, - "sentence_nr": 1 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6734648419604768, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.3445489778722215, - "sentence_nr": 1 + "score": 0.7694606959147566, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.22034235744543199, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8578928092681435, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.3992343412084987, - "sentence_nr": 1 + "score": 0.9422733087334002, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.30041915229862387, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3768445224292385, - "sentence_nr": 1 + "score": 0.5110381669871915, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3029170225422197, - "sentence_nr": 1 + "score": 0.6750223515189266, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.14327475199246492, - "sentence_nr": 1 + "score": 0.42818224355402373, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4803501444747088, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4396910566965852, - "sentence_nr": 1 + "score": 0.7417101158248365, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.38687573986922297, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.18103783785102034, - "sentence_nr": 1 + "score": 0.6514359547109982, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42105372680687736, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.33182702236316497, - "sentence_nr": 1 + "score": 0.7001171094008295, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.1275824962151066, - "sentence_nr": 1 + "score": 0.3865584077322271, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.36765996925422534, - "sentence_nr": 1 + "score": 0.7076640192892537, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.07319259674455142, - "sentence_nr": 1 + "score": 0.3865584077322271, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3270299166479025, - "sentence_nr": 1 + "score": 0.7076640192892537, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3689308416952914, - "sentence_nr": 1 + "score": 0.8110307349404526, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.12454093367377822, - "sentence_nr": 1 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.45400457519342263, - "sentence_nr": 1 + "score": 0.7121135616759211, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.29211251612445716, - "sentence_nr": 1 + "score": 0.7395804946242599, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.08742637130044478, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3782754387193616, - "sentence_nr": 1 + "score": 0.527528099078667, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.23972125922151485, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4276317128610827, - "sentence_nr": 1 + "score": 0.6266330371317139, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.08016111055639634, - "sentence_nr": 1 + "score": 0.42195777059677314, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4056564162743549, - "sentence_nr": 1 + "score": 0.7076271819674439, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42195777059677314, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.3471416148922459, - "sentence_nr": 1 + "score": 0.7128603669502883, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20390514683548702, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4473497453896118, - "sentence_nr": 1 + "score": 0.6152907875442002, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42062888241722096, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.31639773530374476, - "sentence_nr": 1 + "score": 0.6813469636986809, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.26469697944333787, - "sentence_nr": 1 + "score": 0.2600960555023324, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.46740195549409447, - "sentence_nr": 1 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.062198321250135094, - "sentence_nr": 1 + "score": 0.3942058093215873, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3837740336383876, - "sentence_nr": 1 + "score": 0.6294033705157869, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.07506636604929029, - "sentence_nr": 1 + "score": 0.3942058093215873, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3797922048220493, - "sentence_nr": 1 + "score": 0.6452705345581219, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.3843618124722185, - "sentence_nr": 1 + "score": 0.4909101855057947, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.10567309578898446, - "sentence_nr": 1 + "score": 0.39545121937832856, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4024349171516437, - "sentence_nr": 1 + "score": 0.6541357656856408, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.32603788130544104, - "sentence_nr": 1 + "score": 0.43281826407421803, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.1626633565563655, - "sentence_nr": 1 + "score": 0.1974694070034893, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.16242124033839386, - "sentence_nr": 1 + "score": 0.786096406361039, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4787974949414673, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.14377784410436356, - "sentence_nr": 1 + "score": 0.713332477096005, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.12860232766612728, - "sentence_nr": 1 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4883034327593629, - "sentence_nr": 1 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4175745705628701, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.26376811155857266, - "sentence_nr": 1 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0683913918538176, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.31964124359750967, - "sentence_nr": 1 + "score": 0.6576054208318073, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.16961420464787738, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4536404448264584, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.3243082758151494, - "sentence_nr": 1 + "score": 0.8020827133708689, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.13894512516215204, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.483078120317575, - "sentence_nr": 1 + "score": 0.24237768532177115, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.0832724096908118, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.4097982251907115, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.09843551021314972, - "sentence_nr": 1 + "score": 0.39469536234609737, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.44345815368179514, - "sentence_nr": 1 + "score": 0.5917048915180981, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.14797957986848845, - "sentence_nr": 1 + "score": 0.15824382329465247, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.513739080081145, - "sentence_nr": 1 + "score": 0.4020801848996587, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.19557790190470636, - "sentence_nr": 1 + "score": 0.1835554260049945, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.5363764564720104, - "sentence_nr": 1 + "score": 0.4427324890847145, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.39516134977471445, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.13548943675142955, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.46561819719767894, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.06222483146893852, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.3832766336265944, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1264407220858752, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.42491280666939946, - "sentence_nr": 1 + "score": 0.2491316630275714, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.13488308637805477, - "sentence_nr": 1 + "score": 0.20312835120509382, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.44243592943016613, - "sentence_nr": 1 + "score": 0.4762668365393059, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.17119491085533964, - "sentence_nr": 1 + "score": 0.3407563025626974, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.48716868178651374, - "sentence_nr": 1 + "score": 0.6104226554223803, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.13564915029310812, - "sentence_nr": 1 + "score": 0.6407363191582277, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5124402132264054, - "sentence_nr": 1 + "score": 0.6966460917682386, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.15357179047039304, - "sentence_nr": 1 + "score": 0.2719326877457978, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.4395965605263733, - "sentence_nr": 1 + "score": 0.5963825614997932, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5595205105615875, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.3352216651363677, - "sentence_nr": 1 + "score": 0.8322210048001876, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5595205105615875, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.2926736955448575, - "sentence_nr": 1 + "score": 0.8322210048001876, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.33910710471992317, - "sentence_nr": 1 + "score": 0.4770529960418919, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19268479640608693, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.3068634134265278, - "sentence_nr": 1 + "score": 0.49975293173596386, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15083364266523736, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.21066917744439353, - "sentence_nr": 1 + "score": 0.4907822977105627, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.11319164831477802, - "sentence_nr": 1 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.41093314858673247, - "sentence_nr": 1 + "score": 0.723627810424739, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.3662862822569537, - "sentence_nr": 1 + "score": 0.7980971476599384, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.8084123599808738, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2765896733581188, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.400596854878379, - "sentence_nr": 1 + "score": 0.5826805982089127, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.43483587481573205, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3536122668781117, - "sentence_nr": 1 + "score": 0.6723935384652386, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.27080524311589804, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3877466554678465, - "sentence_nr": 1 + "score": 0.5735629822442805, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.058474735537506775, - "sentence_nr": 1 + "score": 0.21576146358278564, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.346711996349685, - "sentence_nr": 1 + "score": 0.5673560872668851, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39545121937832856, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.32876484301179987, - "sentence_nr": 1 + "score": 0.636466558635705, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.14849717699290216, - "sentence_nr": 1 + "score": 0.21576146358278564, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.4416362145529488, - "sentence_nr": 1 + "score": 0.5673560872668851, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.11642798279128005, - "sentence_nr": 1 + "score": 0.38513414673376833, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.439412293625208, - "sentence_nr": 1 + "score": 0.7005713730032203, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.11467045422639609, - "sentence_nr": 1 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.4118325813270988, - "sentence_nr": 1 + "score": 0.786096406361039, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.04984963984762715, - "sentence_nr": 1 + "score": 0.38513414673376833, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.34056245241432903, - "sentence_nr": 1 + "score": 0.7005713730032203, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.10648557917276309, - "sentence_nr": 1 + "score": 0.11385032360134208, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.4293616798847581, - "sentence_nr": 1 + "score": 0.4382795902467684, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.1224748001318708, - "sentence_nr": 1 + "score": 0.1365189729052536, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.41768426353539356, - "sentence_nr": 1 + "score": 0.4259340541380412, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2815871636550668, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.08812222855524378, - "sentence_nr": 1 + "score": 0.2250861242438523, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.3956725101287399, - "sentence_nr": 1 + "score": 0.5618434465935181, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3238579233802238, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.276493585243019, - "sentence_nr": 1 + "score": 0.6198368821194998, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.35438832887300664, - "sentence_nr": 1 + "score": 0.512336215207795, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.0862684017016977, - "sentence_nr": 1 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.46311606179286086, - "sentence_nr": 1 + "score": 0.8131857452490882, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.08054744999594665, - "sentence_nr": 1 + "score": 0.5124776602965491, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.3910533825433727, - "sentence_nr": 1 + "score": 0.6265447017943011, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6730489965212471, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.42292922955918455, - "sentence_nr": 1 + "score": 0.7670434817254471, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.08905416987582906, - "sentence_nr": 1 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.4634600994908148, - "sentence_nr": 1 + "score": 0.6807294776537712, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.09910184808869367, - "sentence_nr": 1 + "score": 0.39469536234609737, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.4062669521282858, - "sentence_nr": 1 + "score": 0.6937261271262425, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.60585154759089, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.15324215252205037, - "sentence_nr": 1 + "score": 0.7547619819808454, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4546414930698417, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.13705597155185278, - "sentence_nr": 1 + "score": 0.4812700337596407, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.42086210633701837, - "sentence_nr": 1 + "score": 0.6942705518980387, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42195777059677314, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4018106536047614, - "sentence_nr": 1 + "score": 0.6687475942312653, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.10183633383410676, - "sentence_nr": 1 + "score": 0.435949382480739, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.4751488277953516, - "sentence_nr": 1 + "score": 0.7673284019128814, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.435949382480739, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.08115687813068284, - "sentence_nr": 1 + "score": 0.7335705336375569, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.1402219074856109, - "sentence_nr": 1 + "score": 0.29715678881302643, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.45645548689619725, - "sentence_nr": 1 + "score": 0.661467129406907, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4464617303464354, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.14370950122782516, - "sentence_nr": 1 + "score": 0.7099628979634083, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3843832649911012, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.3334514572265135, - "sentence_nr": 1 + "score": 0.6360002062017179, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3675667565747676, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.21268091254698024, - "sentence_nr": 1 + "score": 0.5575264207911254, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42221847853238736, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.1520044508572736, - "sentence_nr": 1 + "score": 0.7096175474139502, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2033897418920923, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.14754324776249525, - "sentence_nr": 1 + "score": 0.551556930942916, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.42062888241722096, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.1493596720301927, - "sentence_nr": 1 + "score": 0.6825498124526633, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.13418234666119208, - "sentence_nr": 1 + "score": 0.31771674795486515, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.42515671066046573, - "sentence_nr": 1 + "score": 0.6823632455739186, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5397323593778651, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.16498388417305654, - "sentence_nr": 1 + "score": 0.8110662878512482, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.786096406361039, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.348862726257721, - "sentence_nr": 1 + "score": 0.38918346804460413, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.21644311639014951, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.15101184486338365, - "sentence_nr": 1 + "score": 0.4550086560720594, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.15175485095994987, - "sentence_nr": 1 + "score": 0.33188010562448456, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2919394073770869, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.12067839739874531, - "sentence_nr": 1 + "score": 0.6265777781732258, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.3233637515119462, - "sentence_nr": 1 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.31176467991525436, - "sentence_nr": 1 + "score": 0.40982224146042756, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48156738796358634, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.29400349942844667, - "sentence_nr": 1 + "score": 0.7671994551643374, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.17262205720154453, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.7590994812356263, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.1656224349286263, - "sentence_nr": 1 + "score": 0.857390040146912, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.08879506158981211, - "sentence_nr": 1 + "score": 0.3113612721440885, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.347971425272793, - "sentence_nr": 1 + "score": 0.6244179228679348, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19857943409196785, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.25467468537876675, - "sentence_nr": 1 + "score": 0.4841638348150365, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.24456656109396324, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.23230258637043677, - "sentence_nr": 1 + "score": 0.502026173233975, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.3038489261116855, - "sentence_nr": 1 + "score": 0.1427404270947385, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3766019021279213, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.021201194751007294, - "sentence_nr": 1 + "score": 0.6426846682861654, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.29393541924621686, - "sentence_nr": 1 + "score": 0.3767656346408826, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.08160236983918483, - "sentence_nr": 1 + "score": 0.33807764768133375, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.38567653709947824, - "sentence_nr": 1 + "score": 0.71426422535372, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.44701617851855957, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.3636314071779547, - "sentence_nr": 1 + "score": 0.8047183456557263, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.07352808725672978, - "sentence_nr": 1 + "score": 0.421151249507493, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.39849401484916575, - "sentence_nr": 1 + "score": 0.7602108728496834, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.1601444942465269, - "sentence_nr": 1 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.4576200820848031, - "sentence_nr": 1 + "score": 0.7392285437932827, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.07609797325833854, - "sentence_nr": 1 + "score": 0.3062859135460401, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.3951828103961763, - "sentence_nr": 1 + "score": 0.6540898825644205, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.3304089312918686, - "sentence_nr": 1 + "score": 0.7392285437932827, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.4573525700324587, - "sentence_nr": 1 + "score": 0.4356925719771587, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.09226934981186162, - "sentence_nr": 1 + "score": 0.16246736614250729, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.40757362998605645, - "sentence_nr": 1 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", + "metric": "chrf", + "score": 0.38605159790728016, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.0736975215393763, - "sentence_nr": 1 + "score": 0.3026944877822123, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.450470065659465, - "sentence_nr": 1 + "score": 0.8084123599808738, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.3454543703976745, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.30752616970214336, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.39175069197918183, - "sentence_nr": 1 + "score": 0.662093020699087, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.0948243550653547, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.40688622415675096, - "sentence_nr": 1 + "score": 0.511075227027215, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.37941443386230733, - "sentence_nr": 1 + "score": 0.5741842828404965, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.0754791629755296, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.4115037991203147, - "sentence_nr": 1 + "score": 0.5335784441425054, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.1444984020870621, - "sentence_nr": 1 + "score": 0.32263864160302524, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.4485053623705821, - "sentence_nr": 1 + "score": 0.6529241277890402, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.14266515060023502, - "sentence_nr": 1 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.4205762044754387, - "sentence_nr": 1 + "score": 0.7410529316463808, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.27437048069985176, - "sentence_nr": 1 + "score": 0.7638521785649908, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.1062877417466757, - "sentence_nr": 1 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.38750616531019433, - "sentence_nr": 1 + "score": 0.73719964992947, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0622376426945986, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.3214294905599617, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.09850470636402667, - "sentence_nr": 1 + "score": 0.4797543511401896, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.38526614122005187, - "sentence_nr": 1 + "score": 0.7030838074817461, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.435949382480739, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.3759239923327034, - "sentence_nr": 1 + "score": 0.7673284019128814, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.3693281437350684, - "sentence_nr": 1 + "score": 0.7199991365237522, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.1257482552973572, - "sentence_nr": 1 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.396228613282852, - "sentence_nr": 1 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.17376142320673926, - "sentence_nr": 1 + "score": 0.17855149299161596, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4494840281694199, - "sentence_nr": 1 + "score": 0.5203115480779714, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.09628144140511948, - "sentence_nr": 1 + "score": 0.17855149299161596, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.3988415038006601, - "sentence_nr": 1 + "score": 0.5366596515222662, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.08810203169380636, - "sentence_nr": 1 + "score": 0.1832567180568652, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4085631076024389, - "sentence_nr": 1 + "score": 0.46874652173038095, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.10152372886902537, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.3274648018276542, - "sentence_nr": 1 + "score": 0.5019033159973346, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.0656946802681167, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.39145078362684715, - "sentence_nr": 1 + "score": 0.3882810705699302, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.22759795317133963, - "sentence_nr": 1 + "score": 0.3200586334957503, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.404462271481354, - "sentence_nr": 1 + "score": 0.17020807300741128, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.08828528448332856, - "sentence_nr": 1 + "score": 0.3763693611344683, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.3845635683167561, - "sentence_nr": 1 + "score": 0.6360504215730572, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4909326710993637, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.118369067191548, - "sentence_nr": 1 + "score": 0.20760470031302655, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.3570955668970739, - "sentence_nr": 1 + "score": 0.42791815571433417, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.413948387915005, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.24782633328770076, - "sentence_nr": 1 + "score": 0.6536628131390233, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.1056222479945408, - "sentence_nr": 1 + "score": 0.5391491945473402, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.4147493978520222, - "sentence_nr": 1 + "score": 0.703591887429203, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17202650214787163, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.3490597215692333, - "sentence_nr": 1 + "score": 0.3503510714510492, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.363944181125048, - "sentence_nr": 1 + "score": 0.39058393006987374, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16261055653267345, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.3406214634850959, - "sentence_nr": 1 + "score": 0.3574935801968696, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.36559164703469177, - "sentence_nr": 1 + "score": 0.16405809898807555, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.21644311639014951, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.34592988636867744, - "sentence_nr": 1 + "score": 0.5575527454538532, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.24248545140243574, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.2503407008316354, - "sentence_nr": 1 + "score": 0.5768340234336301, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.18358987342790867, - "sentence_nr": 1 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.47842211510000643, - "sentence_nr": 1 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39569555015790975, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.40917168792265945, - "sentence_nr": 1 + "score": 0.6841500930430788, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.45653838513939016, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.2907747554493043, - "sentence_nr": 1 + "score": 0.5463837424085701, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.37948468164443433, - "sentence_nr": 1 + "score": 0.8084123599808738, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2076047003130265, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.23231306174211885, - "sentence_nr": 1 + "score": 0.5791447789263454, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2821801681960571, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.29313442741821516, - "sentence_nr": 1 + "score": 0.5343486909870273, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.15565663466238167, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.45102089786807525, - "sentence_nr": 1 + "score": 0.19408883848117267, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.38305978177479755, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.3349337342755207, - "sentence_nr": 1 + "score": 0.6457837185727413, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.19306612958933164, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.4566094829965023, - "sentence_nr": 1 + "score": 0.2764205123105664, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.08247696970055073, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4056732117408629, - "sentence_nr": 1 + "score": 0.5109316705796892, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.08599019784098516, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.38046480599975824, - "sentence_nr": 1 + "score": 0.5109316705796892, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.0256345300045686, - "sentence_nr": 1 + "score": 0.5109316705796892, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.43183586900957266, - "sentence_nr": 1 + "score": 0.5370788574666518, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.3063541349224814, - "sentence_nr": 1 + "score": 0.45798136636926595, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.11947870588075608, - "sentence_nr": 1 + "score": 0.18814785746917081, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.3759204930158301, - "sentence_nr": 1 + "score": 0.5307880463310148, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1582866049832572, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.30820582392513496, - "sentence_nr": 1 + "score": 0.34487142413575794, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17905278399134197, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.24438387922480115, - "sentence_nr": 1 + "score": 0.37257295447029826, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.09603814203629989, - "sentence_nr": 1 + "score": 0.15521606028436608, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4321181025319477, - "sentence_nr": 1 + "score": 0.37645329404497957, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.08860973467526746, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.15165087037620367, - "sentence_nr": 1 + "score": 0.3178004360288637, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1418524086391329, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.10988031996776393, - "sentence_nr": 1 + "score": 0.38295770773758747, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15268019045355535, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.14384707977041108, - "sentence_nr": 1 + "score": 0.41028757620299977, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19074380068002203, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.15154293554201603, - "sentence_nr": 1 + "score": 0.40566585096277824, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17382347640129553, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.15332196439486498, - "sentence_nr": 1 + "score": 0.4061580777885601, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.13868172938464635, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.1366803905961902, - "sentence_nr": 1 + "score": 0.3094469764260441, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22381487678101888, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.26639298093015656, - "sentence_nr": 1 + "score": 0.5249370100068887, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.09431297723472011, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.22666622746450207, - "sentence_nr": 1 + "score": 0.3616856339096348, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.11091252683001185, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.26607634610445896, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10666682719585797, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.12132942020746357, - "sentence_nr": 1 + "score": 0.33462901494141756, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.14557808399334188, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.13870031722176082, - "sentence_nr": 1 + "score": 0.36598346755702993, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18154954789336694, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.14848492233400512, - "sentence_nr": 1 + "score": 0.4557483776072868, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20198948917565754, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.17231483245958562, - "sentence_nr": 1 + "score": 0.34858221035657466, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16780109158842918, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.26083297460286664, - "sentence_nr": 1 + "score": 0.3968694014697679, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1381751568911733, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.2205187870837211, - "sentence_nr": 1 + "score": 0.3121557499162649, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22669486951066523, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.1680732599075483, - "sentence_nr": 1 + "score": 0.4484451941575473, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11697642623186386, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.1612742953037833, - "sentence_nr": 1 + "score": 0.37117753637984835, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20065115069964384, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.09719895896945802, - "sentence_nr": 1 + "score": 0.4084885616013531, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.13288058909850656, - "sentence_nr": 1 + "score": 0.09916009482330297, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.423605439146263, - "sentence_nr": 1 + "score": 0.3032928217006101, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.13805615693046389, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.38162976993876024, - "sentence_nr": 1 + "score": 0.40787998733941394, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.12291219097556666, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3448002180666873, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.04281165799178987, - "sentence_nr": 1 + "score": 0.09478705591775652, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.2509007123366836, - "sentence_nr": 1 + "score": 0.33293232395887284, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19148282873929853, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.20795243503763636, - "sentence_nr": 1 + "score": 0.4707949702068854, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20608572305725564, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.14465863776176832, - "sentence_nr": 1 + "score": 0.4704943905570542, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0937099995586274, - "sentence_nr": 1 + "score": 0.14057105892389254, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.38638693017010634, - "sentence_nr": 1 + "score": 0.3028381427383384, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.06087893264282183, - "sentence_nr": 1 + "score": 0.12157241570357182, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.30404764547641244, - "sentence_nr": 1 + "score": 0.4080990097991491, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.085416483900781, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.41200704988717746, - "sentence_nr": 1 + "score": 0.2825804066750608, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.13635319583999642, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.3886887515710143, - "sentence_nr": 1 + "score": 0.2850432830231861, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.14004327215895437, - "sentence_nr": 1 + "score": 0.16431887969160053, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.40026907984156535, - "sentence_nr": 1 + "score": 0.4088971379214799, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11452508920842025, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.15629747573563804, - "sentence_nr": 1 + "score": 0.3212742401272785, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.06070088845782673, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3486357126192648, - "sentence_nr": 1 + "score": 0.2584364364927186, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15184278721506198, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.3714248697825236, - "sentence_nr": 1 + "score": 0.4093399937921707, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.04255423670382886, - "sentence_nr": 1 + "score": 0.15478222669012726, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.2545212986668611, - "sentence_nr": 1 + "score": 0.3550584759508654, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1054433514098504, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.3669589828288568, - "sentence_nr": 1 + "score": 0.2840946641780818, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16758563722627876, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.2877294168054545, - "sentence_nr": 1 + "score": 0.4598125962895632, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.048298770203824865, - "sentence_nr": 1 + "score": 0.07875433150726119, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.356719915230056, - "sentence_nr": 1 + "score": 0.2638954513805452, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.109333912337143, - "sentence_nr": 1 + "score": 0.12768613576122964, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.43946533504329827, - "sentence_nr": 1 + "score": 0.3279857505284436, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18041700926694673, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.3875031655866923, - "sentence_nr": 1 + "score": 0.43852448917973136, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.14527200081334513, - "sentence_nr": 1 + "score": 0.10734088848154077, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.4245172781893951, - "sentence_nr": 1 + "score": 0.33946796348247366, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.14491394634037813, - "sentence_nr": 1 + "score": 0.16136987880724096, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.42283276725533414, - "sentence_nr": 1 + "score": 0.33626920748765377, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.10635098896649771, - "sentence_nr": 1 + "score": 0.15197436941722972, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.43764629149971646, - "sentence_nr": 1 + "score": 0.37271000364127155, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17795920517030017, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3774972710926166, - "sentence_nr": 1 + "score": 0.41862955401967455, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.10434084599663213, - "sentence_nr": 1 + "score": 0.13230039635238258, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.44542543341868346, - "sentence_nr": 1 + "score": 0.3269392904147474, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.13212441564006142, - "sentence_nr": 1 + "score": 0.20266988583156875, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4851083447910682, - "sentence_nr": 1 + "score": 0.48179719155586864, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.08158761703149583, - "sentence_nr": 1 + "score": 0.19388048412249795, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.41046961809624866, - "sentence_nr": 1 + "score": 0.44361702376789247, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.11015904355748653, - "sentence_nr": 1 + "score": 0.17544176680792672, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.44168399257521335, - "sentence_nr": 1 + "score": 0.3246583081139427, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17337747588904887, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.38182147212430423, - "sentence_nr": 1 + "score": 0.3602154895924569, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.08774452514246735, - "sentence_nr": 1 + "score": 0.1237012344369667, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.45281303506820253, - "sentence_nr": 1 + "score": 0.33331866832253354, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.12099786399361606, - "sentence_nr": 1 + "score": 0.09199306870423013, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4447762461237164, - "sentence_nr": 1 + "score": 0.22163854171424513, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20923298022634812, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.35906265614758676, - "sentence_nr": 1 + "score": 0.4391664941823773, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.10096459770150681, - "sentence_nr": 1 + "score": 0.15589802574348086, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4189740217714419, - "sentence_nr": 1 + "score": 0.37894206802233305, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.22294850195164284, - "sentence_nr": 1 + "score": 0.06089987261870556, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5198779161956808, - "sentence_nr": 1 + "score": 0.2933161562815446, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.05462476108190564, - "sentence_nr": 1 + "score": 0.13303798096767047, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3939806692426178, - "sentence_nr": 1 + "score": 0.3647236067340644, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1948502778967486, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.29176887749452535, - "sentence_nr": 1 + "score": 0.35525815981538433, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.12489813745855237, - "sentence_nr": 1 + "score": 0.14391826157279944, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.45100840448088525, - "sentence_nr": 1 + "score": 0.3239832814361818, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.17572052479780473, - "sentence_nr": 1 + "score": 0.1769688060281599, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4667387769636358, - "sentence_nr": 1 + "score": 0.43047038034793145, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1618333627385132, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4278609837970672, - "sentence_nr": 1 + "score": 0.3458746996740858, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10027955093430833, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.43910094482734396, - "sentence_nr": 1 + "score": 0.3650503321876689, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.171833798351082, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.3506155502407636, - "sentence_nr": 1 + "score": 0.4265037420578645, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.14055652045127187, - "sentence_nr": 1 + "score": 0.17393111207515277, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.473241590986821, - "sentence_nr": 1 + "score": 0.39042812195808824, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.15711076787374778, - "sentence_nr": 1 + "score": 0.15998665872195003, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.4926610996660017, - "sentence_nr": 1 + "score": 0.35681333217176553, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.12253628106911543, - "sentence_nr": 1 + "score": 0.22922072303609867, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.41098604819939544, - "sentence_nr": 1 + "score": 0.5075702211165173, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.08517707813747888, - "sentence_nr": 1 + "score": 0.19064689695123957, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.4298965032520897, - "sentence_nr": 1 + "score": 0.36954921822756504, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.1822605494174467, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4674281825701334, - "sentence_nr": 1 + "score": 0.3141794892548087, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.15060224138362507, - "sentence_nr": 1 + "score": 0.11397867508647329, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4960433081987429, - "sentence_nr": 1 + "score": 0.4390501380282409, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.06416670621115099, - "sentence_nr": 1 + "score": 0.1785851272602057, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.37914976744210205, - "sentence_nr": 1 + "score": 0.3800733399524004, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.16678437441301863, - "sentence_nr": 1 + "score": 0.20219794591777904, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.48024716052007455, - "sentence_nr": 1 + "score": 0.4267163836239083, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.19769254283956292, - "sentence_nr": 1 + "score": 0.18851320324917495, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5134092833700632, - "sentence_nr": 1 + "score": 0.4118109845203767, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.10351511568904229, - "sentence_nr": 1 + "score": 0.20113943179758872, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.3512771276108106, - "sentence_nr": 1 + "score": 0.5054929215592371, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.12894382376610666, - "sentence_nr": 1 + "score": 0.16558784557611658, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4774936497378186, - "sentence_nr": 1 + "score": 0.4099467657500184, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.06638301361073934, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.2822733965937896, - "sentence_nr": 1 + "score": 0.33617826818768626, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.18568750775369716, - "sentence_nr": 1 + "score": 0.07088281524771703, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5053829905789087, - "sentence_nr": 1 + "score": 0.1725752257112697, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.13594665641498668, - "sentence_nr": 1 + "score": 0.09337623404557584, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.47621282367548656, - "sentence_nr": 1 + "score": 0.255517984683644, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.12066287439499573, - "sentence_nr": 1 + "score": 0.23272041020266335, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.40225318320388664, - "sentence_nr": 1 + "score": 0.5347837552430531, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.16231893029395061, - "sentence_nr": 1 + "score": 0.11901413329120636, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.4858308027555531, - "sentence_nr": 1 + "score": 0.2908877283991857, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.13721346938175555, - "sentence_nr": 1 + "score": 0.11889226114628741, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4615942596492787, - "sentence_nr": 1 + "score": 0.27239589447707985, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.07617542321559437, - "sentence_nr": 1 + "score": 0.15964995175974525, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.39858919565540674, - "sentence_nr": 1 + "score": 0.3581305879558541, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.07090251712658449, - "sentence_nr": 1 + "score": 0.15593857496482408, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.35520940942408, - "sentence_nr": 1 + "score": 0.3832822126692406, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.14262794392495703, - "sentence_nr": 1 + "score": 0.1101341452220285, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5232138614315397, - "sentence_nr": 1 + "score": 0.27185902677547247, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.16700393857092563, - "sentence_nr": 1 + "score": 0.14086598242600956, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4647429119227333, - "sentence_nr": 1 + "score": 0.36137008859982034, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.21107720643690867, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.43911506176829573, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.11244423294201641, - "sentence_nr": 1 + "score": 0.15873502699291203, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.39638666871963296, - "sentence_nr": 1 + "score": 0.37163016195847015, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16084008820568224, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.044924326658115875, - "sentence_nr": 1 + "score": 0.49453446122836875, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.13664244796691394, - "sentence_nr": 1 + "score": 0.13827947882974537, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.43278158199649547, - "sentence_nr": 1 + "score": 0.4010585707766239, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1046814649445003, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3781094023262652, - "sentence_nr": 1 + "score": 0.35929100435813716, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.052244516140907096, - "sentence_nr": 1 + "score": 0.1605078796467662, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.351436961102141, - "sentence_nr": 1 + "score": 0.3607284417441162, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.054452721416611755, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.26865126568707876, - "sentence_nr": 1 + "score": 0.17786113214625052, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.12832094336767122, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.13407128770012228, - "sentence_nr": 1 + "score": 0.3205905925059277, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.25513503948815797, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.1426346476984381, - "sentence_nr": 1 + "score": 0.5313546358608554, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.13788203160207568, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.10415314128924848, - "sentence_nr": 1 + "score": 0.3107655646435926, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.1266453888967545, - "sentence_nr": 1 + "score": 0.11215442765734894, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.43624812371685906, - "sentence_nr": 1 + "score": 0.31037546676327293, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1634286453435278, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3105036235047128, - "sentence_nr": 1 + "score": 0.3969874268891194, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.19418939219609221, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3901489832573322, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10180741374280794, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.30259031153099203, - "sentence_nr": 1 + "score": 0.35728563956947634, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1945423193070673, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.000862663906142167, - "sentence_nr": 1 + "score": 0.4706222268986097, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2080824447945289, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.1494595377548235, - "sentence_nr": 1 + "score": 0.4427653693519822, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.06928783103636403, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.17419420900027405, - "sentence_nr": 1 + "score": 0.28222804846457444, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.07291105107725455, - "sentence_nr": 1 + "score": 0.1334077033965181, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4474906527730671, - "sentence_nr": 1 + "score": 0.3538599860218621, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.08334085822278188, - "sentence_nr": 1 + "score": 0.12768405545127823, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.40282723830388284, - "sentence_nr": 1 + "score": 0.27604666315815635, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.16487909586055954, - "sentence_nr": 1 + "score": 0.15282474172999858, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.44274512789286224, - "sentence_nr": 1 + "score": 0.3332497402594901, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.1360307408435953, - "sentence_nr": 1 + "score": 0.15757286670204007, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.39263095823622246, - "sentence_nr": 1 + "score": 0.3806769190392542, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.05441383188454176, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.09480078705006485, - "sentence_nr": 1 + "score": 0.24018232621879906, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10490012364788723, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.44714457710624717, - "sentence_nr": 1 + "score": 0.33660463773341737, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.09262646486676755, - "sentence_nr": 1 + "score": 0.11899511888513169, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4550476833458679, - "sentence_nr": 1 + "score": 0.4352142502284442, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.048201474120811695, - "sentence_nr": 1 + "score": 0.12579975046393374, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.274690061108597, - "sentence_nr": 1 + "score": 0.404341788822506, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.11701862696300212, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.46140785066979895, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.9087119657256413, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.09775507836117996, - "sentence_nr": 1 + "score": 0.9560455759500431, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.16354663154362192, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.130164515743941, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.33037125702748205, - "sentence_nr": 1 + "score": 0.34629605922390666, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.08714838249931423, - "sentence_nr": 1 + "score": 0.16831858516266504, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.3692825189624461, - "sentence_nr": 1 + "score": 0.4055923540305375, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.09127577115927074, - "sentence_nr": 1 + "score": 0.10548435635950038, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.3300669374207929, - "sentence_nr": 1 + "score": 0.3216875314121515, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.03878009660847357, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.09136190831662618, - "sentence_nr": 1 + "score": 0.18823804107102407, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1048945915828233, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.13220679623213535, - "sentence_nr": 1 + "score": 0.2682365472098696, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, + "score": 0.06705104400125081, + "sentence_nr": 7 + }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.11092163750960961, - "sentence_nr": 1 + "score": 0.10364481526694266, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.09283959660667528, - "sentence_nr": 1 + "score": 0.14245697322261636, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.3978483671635258, - "sentence_nr": 1 + "score": 0.31264665723789214, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11764506340202335, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.32927950263936856, - "sentence_nr": 1 + "score": 0.33221255414061107, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.03174814557417323, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.10056314185255186, - "sentence_nr": 1 + "score": 0.22677038600489324, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.06992171867383007, - "sentence_nr": 1 + "score": 0.11813127408984, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.3707212650272349, - "sentence_nr": 1 + "score": 0.33360638121733993, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.12452426344763672, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.0008790436005625879, - "sentence_nr": 1 + "score": 0.33397340303561174, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19483286033292496, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.13930120799883589, - "sentence_nr": 1 + "score": 0.3818604583347797, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18262512815371146, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.3231709973877731, - "sentence_nr": 1 + "score": 0.3793372141796691, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.20118989409590474, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.1680802224904863, - "sentence_nr": 1 + "score": 0.4568193159296443, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.08434660455803612, - "sentence_nr": 1 + "score": 0.1463728853883045, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.32335639685468925, - "sentence_nr": 1 + "score": 0.31232598846995213, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.18601317791265554, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.1634625788420781, - "sentence_nr": 1 + "score": 0.3622769931215066, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10134296554489586, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.16025807894446958, - "sentence_nr": 1 + "score": 0.36352625085241486, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.056826643919713225, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.18409427327298736, - "sentence_nr": 1 + "score": 0.2266491488847452, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.04649593117719446, - "sentence_nr": 1 + "score": 0.11586860285558973, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3123287906475391, - "sentence_nr": 1 + "score": 0.34426859851631064, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11537817464773759, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3333615125436495, - "sentence_nr": 1 + "score": 0.3235971362772825, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.05397266389085381, - "sentence_nr": 1 + "score": 0.06766231174629671, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.3006666778870249, - "sentence_nr": 1 + "score": 0.2969445272858798, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15459585401418227, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.2211338091172957, - "sentence_nr": 1 + "score": 0.3893244765025937, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.14329939975046438, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.13172601051893773, - "sentence_nr": 1 + "score": 0.4108923582650918, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.074972966125329, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.31543864697695867, - "sentence_nr": 1 + "score": 0.24626763305506796, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.909878624371155, - "sentence_nr": 1 + "score": 0.14039526843208108, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.9494599978334789, - "sentence_nr": 1 + "score": 0.32365712589934936, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.6484538568755306, - "sentence_nr": 1 + "score": 0.17760506260243636, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.8387015535622947, - "sentence_nr": 1 + "score": 0.3876268199476132, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.9625248317849852, - "sentence_nr": 1 + "score": 0.11133338686962291, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.9799603794887166, - "sentence_nr": 1 + "score": 0.35573855512815966, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.39650106263626994, - "sentence_nr": 1 + "score": 0.03901477466571775, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.68092489158442, - "sentence_nr": 1 + "score": 0.2037012862266554, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.8770669977168674, - "sentence_nr": 1 + "score": 0.06637842065802063, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.9465430749689367, - "sentence_nr": 1 + "score": 0.21920740076602796, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.447394559424499, - "sentence_nr": 1 + "score": 0.1449164009012341, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.7668788405207637, - "sentence_nr": 1 + "score": 0.3465082189188072, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.566119739615098, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.7342868017789353, - "sentence_nr": 1 + "score": 0.013978194191220837, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.1126885377086926, - "sentence_nr": 1 + "score": 0.14269295464324133, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.5312741490187041, - "sentence_nr": 1 + "score": 0.385707323503979, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.1637184523716508, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4161934605653721, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.4106104060507146, - "sentence_nr": 1 + "score": 0.10348510007232478, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.6978219979604, - "sentence_nr": 1 + "score": 0.262284085504438, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.9625248317849852, - "sentence_nr": 1 + "score": 0.2592475459062113, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.9799603794887166, - "sentence_nr": 1 + "score": 0.4431845520770858, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.823872392210652, - "sentence_nr": 1 + "score": 0.10490171797384476, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.9211718482852349, - "sentence_nr": 1 + "score": 0.4224966678718647, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.17236959754271308, - "sentence_nr": 1 + "score": 0.05551337802991313, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17127810877064262, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.17066745219661572, - "sentence_nr": 1 + "score": 0.4177685028472229, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.21087258811486068, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.17964021028231922, - "sentence_nr": 1 + "score": 0.4182975936964002, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.06570335870921905, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.16772440591199625, - "sentence_nr": 1 + "score": 0.2479447435027854, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11560620039242474, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.1761234687731259, - "sentence_nr": 1 + "score": 0.2786890063705584, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.09867858411809763, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.2873490640300303, - "sentence_nr": 1 + "score": 0.3225529873990059, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.09041416838409135, - "sentence_nr": 1 + "score": 0.12370218124034608, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.3890881523525476, - "sentence_nr": 1 + "score": 0.3512013342182375, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.09240248103148029, - "sentence_nr": 1 + "score": 0.22396412213589117, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.40196193955157544, - "sentence_nr": 1 + "score": 0.4759025637784212, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.04294724676074863, - "sentence_nr": 1 + "score": 0.15627978895992434, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.28289709141495645, - "sentence_nr": 1 + "score": 0.32774682416221296, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22220265603625808, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.3264316634099254, - "sentence_nr": 1 + "score": 0.3850042924274655, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.23925860034637106, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.27836308064875176, - "sentence_nr": 1 + "score": 0.4322103094292488, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.10044732284778746, - "sentence_nr": 1 + "score": 0.2340940710746067, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.3430602024503212, - "sentence_nr": 1 + "score": 0.41823616908031946, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.05270938682743268, - "sentence_nr": 1 + "score": 0.12790043463054807, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.31252925174187013, - "sentence_nr": 1 + "score": 0.3251137272879362, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.1075481111616894, - "sentence_nr": 1 + "score": 0.20381173318192514, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.3738814601144911, - "sentence_nr": 1 + "score": 0.4374242147608937, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1863636050757979, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.15334066204940114, - "sentence_nr": 1 + "score": 0.38327347200836553, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.13774451171759725, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.15663428928670334, - "sentence_nr": 1 + "score": 0.3392358805928605, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10995304272182382, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.14942771278303218, - "sentence_nr": 1 + "score": 0.31516290141130093, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.153929566721589, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.16153757205147629, - "sentence_nr": 1 + "score": 0.3608257907466213, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.15592867267790575, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.46677119539598194, - "sentence_nr": 1 + "score": 0.09576088800276077, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.12983585863356562, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.40817978682363515, - "sentence_nr": 1 + "score": 0.3041295470213464, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.09587088737227797, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2616214569856814, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.09302909573829315, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.30560550161855565, - "sentence_nr": 1 + "score": 0.1930745826064226, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22277635660975664, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.05132579587372299, - "sentence_nr": 1 + "score": 0.4138180006822518, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.12683453413302323, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.2912647488684406, - "sentence_nr": 1 + "score": 0.32324923216936663, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10406813778823464, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.1179671428128192, - "sentence_nr": 1 + "score": 0.32304117956922, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11433133814822793, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.35902184995166087, - "sentence_nr": 1 + "score": 0.376685707847211, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16566832287055458, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.29668758510830123, - "sentence_nr": 1 + "score": 0.3899280134984665, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16279177723066465, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.22229988701881476, - "sentence_nr": 1 + "score": 0.4102368807414641, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1418972100477343, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.151253555027421, - "sentence_nr": 1 + "score": 0.35549463565231143, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1340739087751559, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.20433140304565395, - "sentence_nr": 1 + "score": 0.29651992518206227, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11739016262053983, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.3418929341143443, - "sentence_nr": 1 + "score": 0.3243598686623998, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.12391818909971714, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.27032439709340095, - "sentence_nr": 1 + "score": 0.35690404186806807, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.1293238011288526, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4229943657542825, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.08584237196597336, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.36740704811992303, - "sentence_nr": 1 + "score": 0.03625427565378279, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.07306837018670909, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39671144662530883, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.160940602711541, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.17118252592469316, - "sentence_nr": 1 + "score": 0.42923242729167044, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.0529715946034933, - "sentence_nr": 1 + "score": 0.04819270896239086, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.35557346479092056, - "sentence_nr": 1 + "score": 0.262682816076646, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.11764942628273296, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.3447636250916266, - "sentence_nr": 1 + "score": 0.3661118918972216, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1546502365264458, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.36010213387059153, - "sentence_nr": 1 + "score": 0.37662498358584207, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.4082310711203842, - "sentence_nr": 1 + "score": 0.16053664506786586, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.0649353067551241, - "sentence_nr": 1 + "score": 0.05360431433749398, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.40762080099643877, - "sentence_nr": 1 + "score": 0.21840614866685698, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.06197569493404185, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.3338415573583233, - "sentence_nr": 1 + "score": 0.146687038685289, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10180668728147267, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.3967113999971865, - "sentence_nr": 1 + "score": 0.3387919511268085, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.15878190990009447, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.4128330698627673, - "sentence_nr": 1 + "score": 0.4717304128091511, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.0722283716922605, - "sentence_nr": 1 + "score": 0.165417615101112, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.3701322940114362, - "sentence_nr": 1 + "score": 0.3723049467309275, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.14493605424503186, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.35685082927498357, - "sentence_nr": 1 + "score": 0.3501358359880476, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.05664033266477341, - "sentence_nr": 1 + "score": 0.1270407903803617, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.3784614210934525, - "sentence_nr": 1 + "score": 0.3551178453495837, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.08114944360388783, - "sentence_nr": 1 + "score": 0.08711637171502758, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.39320041394379696, - "sentence_nr": 1 + "score": 0.2954967025562741, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1502843121431226, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.1569159469136538, - "sentence_nr": 1 + "score": 0.390370937242866, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2089790450123036, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.14702176025137792, - "sentence_nr": 1 + "score": 0.5020912846079854, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1224795031512657, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.15207203397909086, - "sentence_nr": 1 + "score": 0.29049145213151384, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.08810199114550488, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.150473170651542, - "sentence_nr": 1 + "score": 0.25421876554719885, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1723666822203382, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.15718151788438975, - "sentence_nr": 1 + "score": 0.4164179694728134, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.08174592824469444, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.11334755958850191, - "sentence_nr": 1 + "score": 0.28125186470074287, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10716023124329743, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.15677069289729273, - "sentence_nr": 1 + "score": 0.33305109264677923, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16831858516266504, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.2916166730601614, - "sentence_nr": 1 + "score": 0.44925788158953217, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.06005500834321576, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.2596756015668518, - "sentence_nr": 1 + "score": 0.23500168157276313, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.09532344847561978, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.16085808831478232, - "sentence_nr": 1 + "score": 0.27911273015721655, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1620442038279161, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.03602534230965631, - "sentence_nr": 1 + "score": 0.44671116906860114, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.0808656459604844, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.17980618443164004, - "sentence_nr": 1 + "score": 0.31629942918298065, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 + "score": 0.10362141065231315, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 + "score": 0.2976713869608838, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.3961285597009415, - "sentence_nr": 2 + "score": 0.0535442755496515, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.6148751441350505, - "sentence_nr": 2 + "score": 0.32534949147415587, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 + "score": 0.10502639606076236, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 + "score": 0.3443858948320673, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.3996712647649035, - "sentence_nr": 2 + "score": 0.20843904443769717, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.6353525755760105, - "sentence_nr": 2 + "score": 0.41987041749209575, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.5115346945020283, - "sentence_nr": 2 + "score": 0.16892311117272107, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.7037574715738644, - "sentence_nr": 2 + "score": 0.42944637897474097, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.24715873794308874, - "sentence_nr": 2 + "score": 0.09407649401384535, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.49051792813181655, - "sentence_nr": 2 + "score": 0.24845011725491462, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.6152980280400979, - "sentence_nr": 2 + "score": 0.08853913752009362, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.8311281590297233, - "sentence_nr": 2 + "score": 0.3699120467510799, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.24508104771894088, - "sentence_nr": 2 + "score": 0.10565962599924915, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.5725552336126134, - "sentence_nr": 2 + "score": 0.3576984412786932, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.07750873793282746, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.42519667805364314, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.33608213382072566, - "sentence_nr": 2 + "score": 0.07351837954322755, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.6155314069125684, - "sentence_nr": 2 + "score": 0.16306869473764823, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 + "score": 0.09530685695259108, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.26703508536995574, - "sentence_nr": 2 + "score": 0.324860923353352, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.35315040956049437, - "sentence_nr": 2 + "score": 0.06261022269185519, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.625895188503691, - "sentence_nr": 2 + "score": 0.16543452283536295, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 + "score": 0.10417702105111015, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 + "score": 0.2840081496768262, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.17374951565433233, - "sentence_nr": 2 + "score": 0.22195513186933313, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.45325597884524305, - "sentence_nr": 2 + "score": 0.41825817264037385, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 + "score": 0.10512531898680018, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 + "score": 0.3280524648390448, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 + "score": 0.06549365852999947, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.41580120868053494, - "sentence_nr": 2 + "score": 0.19948256778755252, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.05963579607071745, - "sentence_nr": 2 + "score": 0.33171820256493456, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.31139762378406344, - "sentence_nr": 2 + "score": 0.5317418755857823, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.11346446511593337, - "sentence_nr": 2 + "score": 0.13801294746633136, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.3675317022605926, - "sentence_nr": 2 + "score": 0.3369282304053488, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.2377604053257556, - "sentence_nr": 2 + "score": 0.05045310628409221, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5662768009060447, - "sentence_nr": 2 + "score": 0.26506847806653416, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.10742716472890976, - "sentence_nr": 2 + "score": 0.1177915530693848, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.42694859148910824, - "sentence_nr": 2 + "score": 0.44418786085508993, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.146547094309055, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.36840754531694414, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.19073363590503933, - "sentence_nr": 2 + "score": 0.055310713185595, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.49895382941569383, - "sentence_nr": 2 + "score": 0.25742908684385607, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.20576246677399845, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3789603596807018, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.14745870033404418, - "sentence_nr": 2 + "score": 0.07528967210822063, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.475170637938921, - "sentence_nr": 2 + "score": 0.2659777826374339, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.26459538953931094, - "sentence_nr": 2 + "score": 0.17509809383946048, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.5272178908335121, - "sentence_nr": 2 + "score": 0.386834770913833, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.26801022984888695, - "sentence_nr": 2 + "score": 0.16322400014183205, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.5654883864995515, - "sentence_nr": 2 + "score": 0.47216572692709596, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 + "score": 0.13050295514132168, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 + "score": 0.37331190108047335, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.2735429726790281, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.5644723203818537, - "sentence_nr": 2 + "score": 0.05116952807627418, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.20223322445648179, - "sentence_nr": 2 + "score": 0.10314036721569257, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.5084057058209687, - "sentence_nr": 2 + "score": 0.24963395554422432, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.012201453805310429, - "sentence_nr": 2 + "score": 0.043321633865040066, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.063050817196087, - "sentence_nr": 2 + "score": 0.1541835268049213, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2868985878682555, - "sentence_nr": 2 + "score": 0.13410301071131794, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5832214090256616, - "sentence_nr": 2 + "score": 0.3942932268034351, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.25848476545940924, - "sentence_nr": 2 + "score": 0.598931508663349, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5525933856866961, - "sentence_nr": 2 + "score": 0.7353063745802827, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.13019082899297843, - "sentence_nr": 2 + "score": 0.30677064886592076, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.40512126305429846, - "sentence_nr": 2 + "score": 0.5308555945242818, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.24071298960902482, - "sentence_nr": 2 + "score": 0.17328174803055044, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5438509851618877, - "sentence_nr": 2 + "score": 0.3178268797869574, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2063890416514164, - "sentence_nr": 2 + "score": 0.4262221594184117, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.38567678850872256, - "sentence_nr": 2 + "score": 0.5886657414856064, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3295566054952435, - "sentence_nr": 2 + "score": 0.286608441075188, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5816133441895466, - "sentence_nr": 2 + "score": 0.4579283646292802, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3563758622144919, - "sentence_nr": 2 + "score": 0.3315037521841549, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6037023613177924, - "sentence_nr": 2 + "score": 0.468197879470805, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3574583793293068, - "sentence_nr": 2 + "score": 0.43186481103649477, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5924115119819969, - "sentence_nr": 2 + "score": 0.5792139686527714, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 + "score": 0.30890092021323623, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 + "score": 0.5553909583113487, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.4206507730319955, - "sentence_nr": 2 + "score": 0.22218130727359342, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.678851303587664, - "sentence_nr": 2 + "score": 0.39929356245904674, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.35367180741660353, - "sentence_nr": 2 + "score": 0.46092611919700416, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.6344846206551544, - "sentence_nr": 2 + "score": 0.6365915338629015, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.2632018059331281, - "sentence_nr": 2 + "score": 0.1690979933029136, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.501302719796297, - "sentence_nr": 2 + "score": 0.3751861276375209, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.45286243450930924, - "sentence_nr": 2 + "score": 0.26538706048179084, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.652736896100196, - "sentence_nr": 2 + "score": 0.4982627378595717, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.20586736678432452, - "sentence_nr": 2 + "score": 0.5234484809182233, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5693016623172978, - "sentence_nr": 2 + "score": 0.6658297773613274, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.27075075499555246, - "sentence_nr": 2 + "score": 0.2840563956846642, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5374328610523021, - "sentence_nr": 2 + "score": 0.5110250591004448, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.1455399826828606, - "sentence_nr": 2 + "score": 0.15161074985415177, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4504825146558032, - "sentence_nr": 2 + "score": 0.3796830006266126, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.175866555062937, - "sentence_nr": 2 + "score": 0.3815250264738168, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.2758687846643748, - "sentence_nr": 2 + "score": 0.6516314751979607, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.2948978498692003, - "sentence_nr": 2 + "score": 0.19920413481788912, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5529453973837751, - "sentence_nr": 2 + "score": 0.42537796926163113, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2158914621804855, - "sentence_nr": 2 + "score": 0.18679710353734788, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5448184155666022, - "sentence_nr": 2 + "score": 0.3876457319870774, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.22292726306270316, - "sentence_nr": 2 + "score": 0.40003810431098236, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5653789747970112, - "sentence_nr": 2 + "score": 0.5899097408105687, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 + "score": 0.20401796878756984, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 + "score": 0.43317630453631556, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.26930937054323245, - "sentence_nr": 2 + "score": 0.21812881407613688, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5410704185827219, - "sentence_nr": 2 + "score": 0.3598346059855135, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.24634765861867908, - "sentence_nr": 2 + "score": 0.38047531731529327, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.55968513851572, - "sentence_nr": 2 + "score": 0.49485723102957346, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2044887070217883, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.39152357647177133, - "sentence_nr": 2 + "score": 0.38471585132587544, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3608351361947582, - "sentence_nr": 2 + "score": 0.24586918158076287, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6215415104007418, - "sentence_nr": 2 + "score": 0.4658595745396681, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.08175340974854195, - "sentence_nr": 2 + "score": 0.4489235959690452, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4308342322390109, - "sentence_nr": 2 + "score": 0.5934678825154104, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2980504190448601, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.3185785286756486, - "sentence_nr": 2 + "score": 0.5101268920225042, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.3268233487541633, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6084114123608597, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.46832763312452297, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.04759937639788563, - "sentence_nr": 2 + "score": 0.8176110134774669, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.2500653935141143, - "sentence_nr": 2 + "score": 0.9436043261706615, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.585528867886047, - "sentence_nr": 2 + "score": 0.9880191679951993, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.18031307339768174, - "sentence_nr": 2 + "score": 0.21039673882735752, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.522164454804456, - "sentence_nr": 2 + "score": 0.3872019296036794, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.21403222128228389, - "sentence_nr": 2 + "score": 0.3642482472579296, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.563121432204311, - "sentence_nr": 2 + "score": 0.5342538783335161, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 + "score": 0.18831933500600306, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 + "score": 0.4318025704181776, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.18505378795140082, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.47051087423292237, - "sentence_nr": 2 + "score": 0.3395693620772222, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.09807167131529582, - "sentence_nr": 2 + "score": 0.28406136898728457, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.4646043403137081, - "sentence_nr": 2 + "score": 0.5649283064490618, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.037874984245935134, - "sentence_nr": 2 + "score": 0.21544027588567594, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.13083094614009624, - "sentence_nr": 2 + "score": 0.5040038440508637, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.3522044895579804, - "sentence_nr": 2 + "score": 0.22556860731509948, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.626995375818154, - "sentence_nr": 2 + "score": 0.4747086049005634, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.20064110494011925, - "sentence_nr": 2 + "score": 0.23843418577408987, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5205761630334527, - "sentence_nr": 2 + "score": 0.4082320855803597, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.0684792839692368, - "sentence_nr": 2 + "score": 0.26970223719007375, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.3138898863773231, - "sentence_nr": 2 + "score": 0.5172978597562362, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.1573857459340795, - "sentence_nr": 2 + "score": 0.25449674462950855, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.5347526444819753, - "sentence_nr": 2 + "score": 0.44805409822643144, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.008180069062416927, - "sentence_nr": 2 + "score": 0.49458876622696707, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.04605877529742035, - "sentence_nr": 2 + "score": 0.6179893617801274, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.13904320686250593, - "sentence_nr": 2 + "score": 0.30630098078522544, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.47169365083525167, - "sentence_nr": 2 + "score": 0.5439056051092116, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.2999092588227898, - "sentence_nr": 2 + "score": 0.3312570339636223, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5505916495384416, - "sentence_nr": 2 + "score": 0.45442661484375735, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.4054983797456263, - "sentence_nr": 2 + "score": 0.41520313827696, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6264774230839022, - "sentence_nr": 2 + "score": 0.6485212540886613, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 + "score": 0.19850842371858787, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 + "score": 0.43584341835040474, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.43870712112271204, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6525926696001584, - "sentence_nr": 2 - }, + "score": 0.22744906705116497, + "sentence_nr": 8 + }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4207445490015154, - "sentence_nr": 2 + "score": 0.37233302529431345, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6496192656497308, - "sentence_nr": 2 + "score": 0.5112697990822607, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2772655014585435, - "sentence_nr": 2 + "score": 0.20170335119323748, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4799723286048352, - "sentence_nr": 2 + "score": 0.3541251997977811, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4577065720375266, - "sentence_nr": 2 + "score": 0.16542259679471108, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6729985527773988, - "sentence_nr": 2 + "score": 0.26975832150444645, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2516768028374535, - "sentence_nr": 2 + "score": 0.3857901147929391, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.49572209766846287, - "sentence_nr": 2 + "score": 0.4926358895461277, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2958351954606211, - "sentence_nr": 2 + "score": 0.10508106635796587, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5202221091638364, - "sentence_nr": 2 + "score": 0.3182774828667731, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3471036105446511, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5663019495273462, - "sentence_nr": 2 + "score": 0.2831988281847858, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.009070964338765818, - "sentence_nr": 2 + "score": 0.16885023000999705, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.06852404470758497, - "sentence_nr": 2 + "score": 0.3897135623573608, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3212066202235163, - "sentence_nr": 2 + "score": 0.2309552734743087, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5836558214123343, - "sentence_nr": 2 + "score": 0.43975656978777905, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.21877512875558908, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4263684749347053, - "sentence_nr": 2 + "score": 0.46843499948065653, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.20051119758906127, - "sentence_nr": 2 + "score": 0.3325026294099889, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5334791309401924, - "sentence_nr": 2 + "score": 0.45805631044287126, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 + "score": 0.23530033724858213, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 + "score": 0.46208607300298377, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.23724642034775328, - "sentence_nr": 2 + "score": 0.1759696284842668, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5175129869169551, - "sentence_nr": 2 + "score": 0.40989005404786566, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.1849419409628554, - "sentence_nr": 2 + "score": 0.4824794737945071, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5067677916637257, - "sentence_nr": 2 + "score": 0.6479459161283603, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.37284027455688556, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4228574070038002, - "sentence_nr": 2 + "score": 0.5528347504734102, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.2737513622788043, - "sentence_nr": 2 + "score": 0.14722675403683808, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5787848381439354, - "sentence_nr": 2 + "score": 0.28846790344058515, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.11126509848873964, - "sentence_nr": 2 + "score": 0.4656455050518963, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4338923576538663, - "sentence_nr": 2 + "score": 0.5477103600632085, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2887308472548599, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.31311320826536454, - "sentence_nr": 2 + "score": 0.38846174119508314, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.17236491061326006, - "sentence_nr": 2 + "score": 0.2542828011834812, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5548663878579595, - "sentence_nr": 2 + "score": 0.4695941026465371, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.485644095022506, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.3540506408782035, - "sentence_nr": 2 + "score": 0.5923993450097689, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.08906092883748383, - "sentence_nr": 2 + "score": 0.15487293534817623, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.4317746285352776, - "sentence_nr": 2 + "score": 0.39293494862736383, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.2562849004088193, - "sentence_nr": 2 + "score": 0.19659284558894802, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5767019342009202, - "sentence_nr": 2 + "score": 0.3439604955527307, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3535002370419364, - "sentence_nr": 2 + "score": 0.3150760288937462, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5959879218348465, - "sentence_nr": 2 + "score": 0.4551575101683354, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.21741853044139284, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.3535910166292039, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.5046613014990851, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.6377969619576389, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.38333108639273095, - "sentence_nr": 2 + "score": 0.6485902560215636, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6252821653079126, - "sentence_nr": 2 + "score": 0.7627201392474565, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.19851743023355672, - "sentence_nr": 2 + "score": 0.33626819961829335, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.49793621556542356, - "sentence_nr": 2 + "score": 0.5466581859383387, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.3878152533224771, - "sentence_nr": 2 + "score": 0.20326213373677707, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6512757512743351, - "sentence_nr": 2 + "score": 0.4211311971791892, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.10954782904363085, - "sentence_nr": 2 + "score": 0.38713346831820944, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5090382887002297, - "sentence_nr": 2 + "score": 0.5376493568188783, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.1614809742656655, - "sentence_nr": 2 + "score": 0.32000331642122953, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4145218112165384, - "sentence_nr": 2 + "score": 0.5480591855923784, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.2834484329788497, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5201572704778937, - "sentence_nr": 2 + "score": 0.12890680068769322, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.006569332862878646, - "sentence_nr": 2 + "score": 0.2478022357548686, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.060864196135666904, - "sentence_nr": 2 + "score": 0.49124012500448727, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.2756885721075884, - "sentence_nr": 2 + "score": 0.21132630077912357, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5867077870431389, - "sentence_nr": 2 + "score": 0.4175670766052166, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2465888500427759, - "sentence_nr": 2 + "score": 0.14440270272056518, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5221084445696768, - "sentence_nr": 2 + "score": 0.3829771215415724, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.35983766090218355, - "sentence_nr": 2 + "score": 0.5543498698280007, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5862251404739759, - "sentence_nr": 2 + "score": 0.7016802877815009, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 + "score": 0.13108369255325433, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 + "score": 0.3929302741911199, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3563982585943877, - "sentence_nr": 2 + "score": 0.1667112120846934, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5378970484635915, - "sentence_nr": 2 + "score": 0.30913125513655043, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.1510722413165652, - "sentence_nr": 2 + "score": 0.13203823352287472, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.43592329727028295, - "sentence_nr": 2 + "score": 0.28280767234695003, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.13240628161243978, - "sentence_nr": 2 + "score": 0.20174045447955946, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.3347576434758551, - "sentence_nr": 2 + "score": 0.33729298835089516, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.45865525158396653, - "sentence_nr": 2 + "score": 0.20455275179869584, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6547109311136894, - "sentence_nr": 2 + "score": 0.47815735761186096, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.31372333533981844, - "sentence_nr": 2 + "score": 0.37420316460821246, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5741396495481692, - "sentence_nr": 2 + "score": 0.6481907872475802, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.10878661088699644, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4101850467281004, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4583603882613907, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.671355324267905, - "sentence_nr": 2 + "score": 0.2280725846401638, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.10077062063331403, - "sentence_nr": 2 + "score": 0.32365707034585395, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.09760482860544632, - "sentence_nr": 2 + "score": 0.4584149294578286, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.2600884210903425, - "sentence_nr": 2 + "score": 0.13894362470892055, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.531430106996609, - "sentence_nr": 2 + "score": 0.35710461878741834, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.22150370805587954, - "sentence_nr": 2 + "score": 0.18945235333331134, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5463488388082953, - "sentence_nr": 2 + "score": 0.3443950901432381, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2971752224486841, - "sentence_nr": 2 + "score": 0.26637160423927314, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.605133664481872, - "sentence_nr": 2 + "score": 0.4646735366228476, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2329856851831642, - "sentence_nr": 2 + "score": 0.19585063466021865, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5405751250637106, - "sentence_nr": 2 + "score": 0.37259242197674974, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2563564295134795, - "sentence_nr": 2 + "score": 0.1581734375963556, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5499025328773104, - "sentence_nr": 2 + "score": 0.29855796123914957, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2500653935141143, - "sentence_nr": 2 + "score": 0.41749084544527715, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5098952451698188, - "sentence_nr": 2 + "score": 0.5512835123193889, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.14574402656519908, - "sentence_nr": 2 + "score": 0.2661644792674617, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4275543759804943, - "sentence_nr": 2 + "score": 0.46546633658762687, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3410035628829697, - "sentence_nr": 2 + "score": 0.16832254701348195, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6673519558586546, - "sentence_nr": 2 + "score": 0.34172735320777375, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.24750028117795922, - "sentence_nr": 2 + "score": 0.39245477087067665, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5894646098566614, - "sentence_nr": 2 + "score": 0.5302932215753233, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.17171159782066198, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4250539223489113, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2687379663485886, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5826619907747026, - "sentence_nr": 2 + "score": 0.1567864814929981, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.15897333608001968, - "sentence_nr": 2 + "score": 0.29896910170538116, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4496168003395693, - "sentence_nr": 2 + "score": 0.41423657620328247, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.15089318423122544, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4042478943311393, - "sentence_nr": 2 + "score": 0.1524830877429947, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.39461811323775403, - "sentence_nr": 2 + "score": 0.4102495208939548, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5655204109921267, - "sentence_nr": 2 + "score": 0.6896260480312464, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2786312783602775, - "sentence_nr": 2 + "score": 0.47475604110292025, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.4836796407825139, - "sentence_nr": 2 + "score": 0.6309823582859546, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.41756686236967944, - "sentence_nr": 2 + "score": 0.2685172542195998, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5616829345739638, - "sentence_nr": 2 + "score": 0.5385038880406502, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.4205004825822372, - "sentence_nr": 2 + "score": 0.23511486401816076, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5674537639314233, - "sentence_nr": 2 + "score": 0.4879376273715227, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.40974323819644953, - "sentence_nr": 2 + "score": 0.46676620029268096, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5368112087257564, - "sentence_nr": 2 + "score": 0.5960578086281888, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.219672574669477, - "sentence_nr": 2 + "score": 0.21011665246752942, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.37413906702142435, - "sentence_nr": 2 + "score": 0.5022122132107005, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.45613731449823464, - "sentence_nr": 2 + "score": 0.23892317781280908, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.6655742412177843, - "sentence_nr": 2 + "score": 0.38939729384213495, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.3116520879159789, - "sentence_nr": 2 + "score": 0.3481041677117235, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.47517792402030584, - "sentence_nr": 2 + "score": 0.5683248500873576, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.2931120472907597, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5551374467086605, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.3627923367798331, - "sentence_nr": 2 + "score": 0.0935071820875984, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5255399246733422, - "sentence_nr": 2 + "score": 0.3104146584296793, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.12870376210497989, - "sentence_nr": 2 + "score": 0.46742290550462806, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.22151451171035633, - "sentence_nr": 2 + "score": 0.6201011845566285, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.30181468526956173, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5261802780475523, - "sentence_nr": 2 + "score": 0.312650266868888, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.25564177137418986, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.49870011615602194, - "sentence_nr": 2 + "score": 0.127245399039237, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.39579112101105834, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.6431490866428237, - "sentence_nr": 2 + "score": 0.08385867792924734, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.38189567401226293, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.6154314825900052, - "sentence_nr": 2 + "score": 0.1131435241082435, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.3436153961225413, - "sentence_nr": 2 + "score": 0.3955646912745367, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5954254642696512, - "sentence_nr": 2 + "score": 0.5363251034746059, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.4339219137216798, - "sentence_nr": 2 + "score": 0.26460159523593296, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.6338401824373191, - "sentence_nr": 2 + "score": 0.4803700055675181, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.27946415227589155, - "sentence_nr": 2 + "score": 0.12561164303321054, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.4938296655037709, - "sentence_nr": 2 + "score": 0.47038542160135094, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.5621669807462487, - "sentence_nr": 2 + "score": 0.12748893606628386, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.7520069792061377, - "sentence_nr": 2 + "score": 0.3557730263992019, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.26925601229087914, - "sentence_nr": 2 + "score": 0.4174381195316011, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.6050395148484196, - "sentence_nr": 2 + "score": 0.5467874115748124, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.27336087678628246, - "sentence_nr": 2 + "score": 0.18187597339521155, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.4919779927233182, - "sentence_nr": 2 + "score": 0.47371700399657607, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.43235877156651625, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.614485867381761, - "sentence_nr": 2 + "score": 0.18399961762411743, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.012951112459987979, - "sentence_nr": 2 + "score": 0.24762685010055863, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.11882277038397698, - "sentence_nr": 2 + "score": 0.380770036603192, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.3272963527043486, - "sentence_nr": 2 + "score": 0.2372622545962587, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.5715613564297359, - "sentence_nr": 2 + "score": 0.41017604192890195, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.15985840708020788, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.44951053332729884, - "sentence_nr": 2 + "score": 0.15798724534926178, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.35253338922743144, - "sentence_nr": 2 + "score": 0.09458362068147118, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.6487975154557831, - "sentence_nr": 2 + "score": 0.19112709920459806, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.2126707920684064, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.4659908460634765, - "sentence_nr": 2 + "score": 0.26917712505386043, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.2914880531303981, - "sentence_nr": 2 + "score": 0.11450137919698138, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.5962886968213414, - "sentence_nr": 2 + "score": 0.38588319985262204, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.23944666570758283, - "sentence_nr": 2 + "score": 0.2529258575884984, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.5106509239874657, - "sentence_nr": 2 + "score": 0.43636064973584276, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.17920531400657588, - "sentence_nr": 2 + "score": 0.2206598690689583, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.4522763055702811, - "sentence_nr": 2 + "score": 0.4671100153557676, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.356290219128095, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.669942335348411, - "sentence_nr": 2 + "score": 0.3284807347106534, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.26199400535088346, - "sentence_nr": 2 + "score": 0.19276506991327308, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.5804827870380099, - "sentence_nr": 2 + "score": 0.43458455506290555, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.13442725522288548, - "sentence_nr": 2 + "score": 0.28127656729267564, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.39067504005337655, - "sentence_nr": 2 + "score": 0.48992770341073005, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.20533250289138671, - "sentence_nr": 2 + "score": 0.2584188371476316, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.49681810344665644, - "sentence_nr": 2 + "score": 0.37301467093894103, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.0950330051810703, - "sentence_nr": 2 + "score": 0.4094746713677566, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.2535554509913635, - "sentence_nr": 2 + "score": 0.5127629442173787, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.20378172261136207, - "sentence_nr": 2 + "score": 0.1978740176644931, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.48600008237332104, - "sentence_nr": 2 + "score": 0.3475150762120378, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.27217589854489177, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.5756343666825848, - "sentence_nr": 2 + "score": 0.12525435825547931, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.24513414885202045, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.5476647609559218, - "sentence_nr": 2 + "score": 0.2209612814301191, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.4973274282641141, - "sentence_nr": 2 + "score": 0.11872519045542135, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.2516768028374535, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.47249781871556595, - "sentence_nr": 2 + "score": 0.1415901806866318, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.16105265992626083, - "sentence_nr": 2 + "score": 0.12467595443249284, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.404377371664668, - "sentence_nr": 2 + "score": 0.2225814067024197, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.15813859795767055, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.44607340294350173, - "sentence_nr": 2 + "score": 0.1386200663966604, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.35253338922743144, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.6286864313376063, - "sentence_nr": 2 + "score": 0.1507724411705623, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.20533250289138671, - "sentence_nr": 2 + "score": 0.3882065230979266, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.45975635079501215, - "sentence_nr": 2 + "score": 0.5170531810333934, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.0698714799763323, - "sentence_nr": 2 + "score": 0.27850162207652013, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.18217918401705574, - "sentence_nr": 2 + "score": 0.4893867900242687, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.3077422016953529, - "sentence_nr": 2 + "score": 0.2686424829558855, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5943673820353285, - "sentence_nr": 2 + "score": 0.3998516011592878, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.09564571510780719, - "sentence_nr": 2 + "score": 0.7037873295747725, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.1907009110214351, - "sentence_nr": 2 + "score": 0.7990268043083656, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.10353153556093725, - "sentence_nr": 2 + "score": 0.36484904083194636, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.40215410362634535, - "sentence_nr": 2 + "score": 0.5860311918255953, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.28467215304840787, - "sentence_nr": 2 + "score": 0.14220602298368126, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4298052820106505, - "sentence_nr": 2 + "score": 0.39434634170155597, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.28653528640783255, - "sentence_nr": 2 + "score": 0.17627564495170006, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.5053636612097852, - "sentence_nr": 2 + "score": 0.2829970218265084, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.17979384730979156, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.4177311931467539, - "sentence_nr": 2 + "score": 0.1526113403954924, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.2767429728676341, - "sentence_nr": 2 + "score": 0.24044159635524803, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.51183750216717, - "sentence_nr": 2 + "score": 0.4467052322987113, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.3192837057100497, - "sentence_nr": 2 + "score": 0.4269094075482239, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.4886865884781344, - "sentence_nr": 2 + "score": 0.6440338986542221, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.19358934025667454, - "sentence_nr": 2 + "score": 0.2089934379295256, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.3468163065453778, - "sentence_nr": 2 + "score": 0.4202179280810887, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.35728152609132297, - "sentence_nr": 2 + "score": 0.23927776234174902, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.6216765378447047, - "sentence_nr": 2 + "score": 0.42492275577244626, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.24328450115124742, - "sentence_nr": 2 + "score": 0.4192565023565104, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.42756232255111404, - "sentence_nr": 2 + "score": 0.47988160867636526, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.2825074232826454, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4576805072760733, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.383259260976792, - "sentence_nr": 2 + "score": 0.24073557586211028, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5920204217586964, - "sentence_nr": 2 + "score": 0.42665885117139607, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.19180992590551618, - "sentence_nr": 2 + "score": 0.37356506757316704, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.3308002143045663, - "sentence_nr": 2 + "score": 0.5184539879286838, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.22305706065076847, - "sentence_nr": 2 + "score": 0.26105773506180324, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5202782711775973, - "sentence_nr": 2 + "score": 0.4938667045198544, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.15426765225005337, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.45289625960131974, - "sentence_nr": 2 + "score": 0.29977450507928116, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.31326946419057006, - "sentence_nr": 2 + "score": 0.33448899741633614, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.591171976889058, - "sentence_nr": 2 + "score": 0.4659921217718883, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.1702602472176709, - "sentence_nr": 2 + "score": 0.08369850049073722, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.4366640707779677, - "sentence_nr": 2 + "score": 0.2901642042976994, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.2615659486493292, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.5126931977939474, - "sentence_nr": 2 + "score": 0.15901023657267704, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.1840585956032067, - "sentence_nr": 2 + "score": 0.08048862002869049, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.5282240694221736, - "sentence_nr": 2 + "score": 0.1853605878762701, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.004770195810675918, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.03746566558042944, - "sentence_nr": 2 + "score": 0.14911046740590622, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.35728152609132297, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.6309384943070174, - "sentence_nr": 2 + "score": 0.31528213773035774, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 + "score": 0.19951297936500814, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.5335795368341105, - "sentence_nr": 2 + "score": 0.3268993135281582, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.41520266266344963, - "sentence_nr": 2 + "score": 0.17837945138789355, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.31487248334376844, - "sentence_nr": 2 + "score": 0.23465398368701498, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.5905881236136059, - "sentence_nr": 2 + "score": 0.45367638954943146, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.19472361650315084, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.03509992242758199, - "sentence_nr": 2 + "score": 0.40377628308461305, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.30204473943342836, - "sentence_nr": 2 + "score": 0.36966653028794927, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.5394635390078353, - "sentence_nr": 2 + "score": 0.619000557599158, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.7096224667917136, - "sentence_nr": 2 + "score": 0.14978817967886865, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.8862932371217843, - "sentence_nr": 2 + "score": 0.3579627976703376, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.5294442646627652, - "sentence_nr": 2 + "score": 0.3428955163829333, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.7281375072835307, - "sentence_nr": 2 + "score": 0.4549331138881434, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.933651069586263, - "sentence_nr": 2 + "score": 0.19374128629783371, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.9586507529693243, - "sentence_nr": 2 + "score": 0.42300747804792893, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.933651069586263, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.9586507529693243, - "sentence_nr": 2 + "score": 0.19196608410425278, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.6337520241233826, - "sentence_nr": 2 + "score": 0.4492698041709823, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.7734740773636255, - "sentence_nr": 2 + "score": 0.529528271776728, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.882190724997149, - "sentence_nr": 2 + "score": 0.1768429537093963, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.943123392401343, - "sentence_nr": 2 + "score": 0.39729894405642474, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.5842771441222545, - "sentence_nr": 2 + "score": 0.1783827232160263, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.7733784586326149, - "sentence_nr": 2 + "score": 0.420993901006138, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.3359230828063256, - "sentence_nr": 2 + "score": 0.5471998982127312, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.573086119969458, - "sentence_nr": 2 + "score": 0.7261143787285806, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.12531520484413727, - "sentence_nr": 2 + "score": 0.28098066731217336, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.3550670027779894, - "sentence_nr": 2 + "score": 0.5947928282659879, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.7106361351765512, - "sentence_nr": 2 + "score": 0.3471867713780383, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.8390104107504974, - "sentence_nr": 2 + "score": 0.43403373030619585, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.8627586293513119, - "sentence_nr": 2 + "score": 0.33731180652769377, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.8964369716535558, - "sentence_nr": 2 + "score": 0.43284789974371823, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.9082489095559809, - "sentence_nr": 2 + "score": 0.25107542027773755, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.9677853954871374, - "sentence_nr": 2 + "score": 0.4717531424710489, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.33713757310040376, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.5731908178757754, - "sentence_nr": 2 + "score": 0.12765417324195955, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.4162915990459618, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.5970097205621886, - "sentence_nr": 2 + "score": 0.10215316399947212, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.3816408219023713, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.5784105768028126, - "sentence_nr": 2 + "score": 0.11108510046198607, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.39234342738825634, - "sentence_nr": 2 + "score": 0.22669018884314224, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.5972186089812455, - "sentence_nr": 2 + "score": 0.44041899547143254, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.361250819353898, - "sentence_nr": 2 + "score": 0.1701094120440135, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.6105988260114965, - "sentence_nr": 2 + "score": 0.29286151337006916, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.20134984470993175, - "sentence_nr": 2 + "score": 0.2927933973558131, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.47593263188822477, - "sentence_nr": 2 + "score": 0.47375496223846336, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.45319466339683195, - "sentence_nr": 2 + "score": 0.06874614919231697, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.7044338153663002, - "sentence_nr": 2 + "score": 0.20986932076566409, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.18236198178601878, - "sentence_nr": 2 + "score": 0.1441116002025322, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.4546828651423093, - "sentence_nr": 2 + "score": 0.23046678878134805, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.18415822638260726, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4239216335535842, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.4710304492059704, - "sentence_nr": 2 + "score": 0.2916607130801699, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.6951644805792349, - "sentence_nr": 2 + "score": 0.530090321409246, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.20786721468392394, - "sentence_nr": 2 + "score": 0.5344974294085829, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.22772581789894308, - "sentence_nr": 2 + "score": 0.6840734337993466, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.36463597249757107, - "sentence_nr": 2 + "score": 0.24006100607956476, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.5937597266341832, - "sentence_nr": 2 + "score": 0.525627718929817, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.19129143021561437, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.390473445537339, - "sentence_nr": 2 + "score": 0.3180137236294719, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.25848476545940924, - "sentence_nr": 2 + "score": 0.485644095022506, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.4897308313348651, - "sentence_nr": 2 + "score": 0.5954238368250169, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.18398226639192106, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.37285010531146734, - "sentence_nr": 2 + "score": 0.3762634236591465, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.18116830735735984, - "sentence_nr": 2 + "score": 0.33384668064796064, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.378028436956142, - "sentence_nr": 2 + "score": 0.4509736001471859, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.2539169853234758, - "sentence_nr": 2 + "score": 0.27392978689521524, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.4294871148542432, - "sentence_nr": 2 + "score": 0.4257914116040595, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.12731505388718733, - "sentence_nr": 2 + "score": 0.17712030567063494, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.3354785767663773, - "sentence_nr": 2 + "score": 0.3607125350126197, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.4055828482909762, - "sentence_nr": 2 + "score": 0.20135763034646928, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.6637253055098513, - "sentence_nr": 2 + "score": 0.35006080282921004, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.2541277777982055, - "sentence_nr": 2 + "score": 0.37565846334638286, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4083801144711196, - "sentence_nr": 2 + "score": 0.45365001576190844, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.1350785139238714, - "sentence_nr": 2 + "score": 0.25087428990920285, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.3131061676292571, - "sentence_nr": 2 + "score": 0.4056641749589937, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.24814514148154546, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.5127275550938416, - "sentence_nr": 2 + "score": 0.1529665009156386, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.0440854328121759, - "sentence_nr": 2 + "score": 0.15471428129658021, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.053628247089794495, - "sentence_nr": 2 + "score": 0.23308330456083767, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.2500984051960647, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.510079353939284, - "sentence_nr": 2 + "score": 0.19252575667755267, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.27057949011516347, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5644281635271426, - "sentence_nr": 2 + "score": 0.18545347920261077, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.29851690541541476, - "sentence_nr": 2 + "score": 0.3552926519343793, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.6224209860013706, - "sentence_nr": 2 + "score": 0.5830807494515953, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.26958884543190903, - "sentence_nr": 2 + "score": 0.2440425026082214, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5631664732610485, - "sentence_nr": 2 + "score": 0.4841963273421365, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.2503955135641583, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.5411247834284307, - "sentence_nr": 2 + "score": 0.12514106545496687, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.26115021337737276, - "sentence_nr": 2 - }, + "score": 0.07142565875275515, + "sentence_nr": 8 + }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.5520240492306279, - "sentence_nr": 2 + "score": 0.156820371591221, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.1676495122493199, - "sentence_nr": 2 + "score": 0.34038446123808824, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.36959105080753546, - "sentence_nr": 2 + "score": 0.5113782796801761, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.38364238388665217, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.6777989738947793, - "sentence_nr": 2 + "score": 0.1423117641326302, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.19300403619224038, - "sentence_nr": 2 + "score": 0.09621288561909111, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.572822450824776, - "sentence_nr": 2 + "score": 0.19434864892148712, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.20789192174660942, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.40675321225858063, - "sentence_nr": 2 + "score": 0.15449597020263703, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.2799103318440567, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.6172633503183605, - "sentence_nr": 2 + "score": 0.006107696271654331, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.15492402556203205, - "sentence_nr": 2 + "score": 0.38082873348976415, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.3125746694462918, - "sentence_nr": 2 + "score": 0.5493718467604206, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.23233851806966574, - "sentence_nr": 2 + "score": 0.36481383830535447, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5357993047462365, - "sentence_nr": 2 + "score": 0.5676950665117104, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.24285172240675165, - "sentence_nr": 2 + "score": 0.19951297936500814, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.4655392375590772, - "sentence_nr": 2 + "score": 0.30595434646385716, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.29796912700911177, - "sentence_nr": 2 + "score": 0.3491726680217181, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.5158892363484622, - "sentence_nr": 2 + "score": 0.49174493737704345, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.4005296397635166, - "sentence_nr": 2 + "score": 0.20326213373677707, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.6201785376974677, - "sentence_nr": 2 + "score": 0.3965155400704921, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.4005296397635166, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.591086403119955, - "sentence_nr": 2 + "score": 0.14220836651767108, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.3114493863658917, - "sentence_nr": 2 + "score": 0.19783691883509516, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5682352098535879, - "sentence_nr": 2 + "score": 0.35420217205091864, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.008072417039197614, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.06388295238713035, - "sentence_nr": 2 + "score": 0.14287380394182542, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.3990867885395787, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.630492990358084, - "sentence_nr": 2 + "score": 0.14713433937849357, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.19851743023355672, - "sentence_nr": 2 + "score": 0.23515811334479123, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.547468247371695, - "sentence_nr": 2 + "score": 0.41733625901113247, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2550611509722341, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.341534333789316, - "sentence_nr": 2 + "score": 0.36821398145189993, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.5541872808406454, - "sentence_nr": 2 + "score": 0.6015510626637584, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.12031041493621579, - "sentence_nr": 2 + "score": 0.22008558571360354, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.2540681992986826, - "sentence_nr": 2 + "score": 0.4407629091157834, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.21910942711629067, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.4572726847360035, - "sentence_nr": 2 + "score": 0.4292296939530436, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.07276375309803214, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.38861839385008856, - "sentence_nr": 2 + "score": 0.1349922772392652, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.17377261603583774, - "sentence_nr": 2 + "score": 0.1381958549483014, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.4342710497791623, - "sentence_nr": 2 + "score": 0.26422743815167654, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.15956483578595942, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.425693420655628, - "sentence_nr": 2 + "score": 0.11714319535206957, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.09791579531860735, - "sentence_nr": 2 + "score": 0.20972571494011877, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.40432986440529917, - "sentence_nr": 2 + "score": 0.395894071208527, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.10423563468216913, - "sentence_nr": 2 + "score": 0.18559542135951204, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.34390006822858976, - "sentence_nr": 2 + "score": 0.3804842882867387, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.11125382292406938, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.36362589237918785, - "sentence_nr": 2 + "score": 0.35369375385786006, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.26330018250960563, - "sentence_nr": 2 + "score": 0.12475846123062707, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5604741582178225, - "sentence_nr": 2 + "score": 0.27823340731817514, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.16285656455323885, - "sentence_nr": 2 + "score": 0.15122189206102096, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.41845720590580077, - "sentence_nr": 2 + "score": 0.26750110507308866, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.10759927692349745, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.21065794536310511, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.1752818941059842, - "sentence_nr": 2 + "score": 0.30327872414714485, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.4632619281274353, - "sentence_nr": 2 + "score": 0.49804213541579834, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.21685485833927476, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.06150895863726817, - "sentence_nr": 2 + "score": 0.3714219747170047, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.20484322521277584, - "sentence_nr": 2 + "score": 0.12274092982883021, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.4996688420059074, - "sentence_nr": 2 + "score": 0.3385513651938691, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.35205535634937346, - "sentence_nr": 2 + "score": 0.12787395553510186, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5769772651090223, - "sentence_nr": 2 + "score": 0.33752742535974617, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.33631398011857205, - "sentence_nr": 2 + "score": 0.112289032173749, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6332428715049205, - "sentence_nr": 2 + "score": 0.17726100052085036, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.2323385180696658, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5019509292309764, - "sentence_nr": 2 + "score": 0.31017716089889963, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.19726472415983368, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5084335265908847, - "sentence_nr": 2 + "score": 0.39962545473912425, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.22897967367089514, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5158963534678644, - "sentence_nr": 2 + "score": 0.3710595252626966, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.196046355324564, - "sentence_nr": 2 + "score": 0.21688283061839067, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.43350415347352517, - "sentence_nr": 2 + "score": 0.41775824162589076, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3234043476847562, - "sentence_nr": 2 + "score": 0.14326513489612383, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5909130619522283, - "sentence_nr": 2 + "score": 0.4034278533385552, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.19057582910221915, - "sentence_nr": 2 + "score": 0.12666372160329223, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.44257167922089413, - "sentence_nr": 2 + "score": 0.2650373529479294, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.21889549804942124, - "sentence_nr": 2 + "score": 0.12162779391619735, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.48502913647746226, - "sentence_nr": 2 + "score": 0.3228288840559658, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2799506947952143, - "sentence_nr": 2 + "score": 0.18294117097472648, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5700777642587023, - "sentence_nr": 2 + "score": 0.4383387744769579, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.203264842568494, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2922087191170089, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2904194745959351, - "sentence_nr": 2 + "score": 0.18237599479708327, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5399262338172586, - "sentence_nr": 2 + "score": 0.3740403511567824, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.21108332811806296, - "sentence_nr": 2 + "score": 0.20298407172594946, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5847750744232335, - "sentence_nr": 2 + "score": 0.427376330935813, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2453238227047589, - "sentence_nr": 2 + "score": 0.1740044679403827, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.55017080577881, - "sentence_nr": 2 + "score": 0.36375152376157177, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.22952177306405494, - "sentence_nr": 2 + "score": 0.12876689524369925, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5279520952576137, - "sentence_nr": 2 + "score": 0.3253153379449275, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.2950615456579434, - "sentence_nr": 2 + "score": 0.15626231814206226, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5675143775417766, - "sentence_nr": 2 + "score": 0.2918712789926548, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.18643403650822063, - "sentence_nr": 2 + "score": 0.322788951728102, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5048280010205698, - "sentence_nr": 2 + "score": 0.40263021320001785, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.1543252261021413, - "sentence_nr": 2 + "score": 0.119159749312327, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.3797469086855575, - "sentence_nr": 2 + "score": 0.21297942664093145, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.34749088141991274, - "sentence_nr": 2 + "score": 0.21397099133614067, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6635709388374615, - "sentence_nr": 2 + "score": 0.3568171392601981, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.16617179744038174, - "sentence_nr": 2 + "score": 0.16925466459550803, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.48117026601244495, - "sentence_nr": 2 + "score": 0.35912398848424326, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.14383758787403153, - "sentence_nr": 2 + "score": 0.2036348471340078, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.42888743368788473, - "sentence_nr": 2 + "score": 0.3472831655579266, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.23584494013034235, - "sentence_nr": 2 + "score": 0.2298971389591186, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.5389135951883217, - "sentence_nr": 2 + "score": 0.45764667682340326, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.011961593826815614, - "sentence_nr": 2 + "score": 0.2327080490816513, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.10171812934151993, - "sentence_nr": 2 + "score": 0.4213315211213489, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "bleu", - "score": 0.27271804425850804, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", "metric": "chrf", - "score": 0.6231583014699292, - "sentence_nr": 2 + "score": 0.24362353508932386, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3520774812078196, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5735788202105873, - "sentence_nr": 2 + "score": 0.2769725060346048, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.2063529291350913, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.41364248023079064, - "sentence_nr": 2 + "score": 0.2568191876426829, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.3618488169166299, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_from", "metric": "chrf", - "score": 0.5708179622131996, - "sentence_nr": 2 + "score": 0.28135849152758385, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.44536846829231563, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.626601305779226, - "sentence_nr": 2 + "score": 0.3238973846683935, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.45286243450930924, - "sentence_nr": 2 + "score": 0.12832055613623328, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.5635810887606836, - "sentence_nr": 2 + "score": 0.19433944404681203, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.2336554865490948, - "sentence_nr": 2 + "score": 0.14482189302397735, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.38695981569355575, - "sentence_nr": 2 + "score": 0.2913876815877049, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.39673576824033097, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.6640084839050983, - "sentence_nr": 2 + "score": 0.37693028676849333, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0.20241924705494113, - "sentence_nr": 2 + "score": 0.14410670132605607, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.4785445547362629, - "sentence_nr": 2 + "score": 0.24025207593480963, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.16306957103469613, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.3291745670182042, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.5666889547785301, - "sentence_nr": 2 + "score": 0.24942094354139677, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.2026004770366011, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.30270818881631195, - "sentence_nr": 2 + "score": 0.9199349282509897, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.27353798204430885, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.4703704190720731, - "sentence_nr": 2 + "score": 0.9199349282509897, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1506914981676572, - "sentence_nr": 2 + "score": 0.21688283061839067, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.49409850038698094, - "sentence_nr": 2 + "score": 0.4479129164961325, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.10586140133972588, - "sentence_nr": 2 + "score": 0.27434065146872866, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4674053477944039, - "sentence_nr": 2 + "score": 0.4551761513917315, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1712766252338756, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5225554962608486, - "sentence_nr": 2 + "score": 0.28112283847231073, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.1549337617358287, - "sentence_nr": 2 + "score": 0.2112174444529806, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.47063780888858964, - "sentence_nr": 2 + "score": 0.37115876147810895, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.087593103737711, - "sentence_nr": 2 + "score": 0.11460384138378832, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.41197148322773003, - "sentence_nr": 2 + "score": 0.32308861733051, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.09993298280365949, - "sentence_nr": 2 + "score": 0.1308613527030366, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.48180385986334856, - "sentence_nr": 2 + "score": 0.3063146286877558, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.41169955008329745, - "sentence_nr": 2 + "score": 0.12030921204016166, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7046826076821049, - "sentence_nr": 2 + "score": 0.2935108999290831, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.11760297043792217, - "sentence_nr": 2 + "score": 0.22974300992320248, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5037771891801089, - "sentence_nr": 2 + "score": 0.35766721538849355, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.12787395553510186, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.21931515993565381, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.31178681104808115, - "sentence_nr": 2 + "score": 0.20236126962624626, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.6235184561135673, - "sentence_nr": 2 + "score": 0.3258121781111335, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.1812045836887171, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.0054987473538930624, - "sentence_nr": 2 + "score": 0.18649703687001343, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18502495276209577, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.49774903659564634, - "sentence_nr": 2 + "score": 0.1441966459257424, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2797290030028961, - "sentence_nr": 2 + "score": 0.23870544239673078, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5092945860838002, - "sentence_nr": 2 + "score": 0.30389718661514126, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2703645496410475, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5129310433304475, - "sentence_nr": 2 + "score": 0.14288815197601673, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2709079038456153, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.447458019441992, - "sentence_nr": 2 + "score": 0.14957316612525498, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.27075075499555246, - "sentence_nr": 2 + "score": 0.17081061355061614, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5079958750910802, - "sentence_nr": 2 + "score": 0.3645499017230567, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.19134492872562123, - "sentence_nr": 2 + "score": 0.26958290276046354, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4261251088174477, - "sentence_nr": 2 + "score": 0.3771988116643981, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.043167422631559454, - "sentence_nr": 2 + "score": 0.27675048474641756, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.14513634182269314, - "sentence_nr": 2 + "score": 0.3780460244391623, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.3428111199165518, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6267480478468405, - "sentence_nr": 2 + "score": 0.3615889761528277, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.20787389114353938, - "sentence_nr": 2 + "score": 0.2758862937563794, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5395612843354369, - "sentence_nr": 2 + "score": 0.4673996585329364, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.12503614625842938, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.20624064341134082, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.19107912313367556, - "sentence_nr": 2 + "score": 0.20588815727980112, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.47457026427329674, - "sentence_nr": 2 + "score": 0.41944461991174653, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.18227211511988975, - "sentence_nr": 2 + "score": 0.28234422994155567, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.26739926509879147, - "sentence_nr": 2 + "score": 0.40408604199549997, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.2244921781886412, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.48547869147117434, - "sentence_nr": 2 + "score": 0.3368893372278425, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.26036802768146033, - "sentence_nr": 2 + "score": 0.13588969750586194, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5255752089611478, - "sentence_nr": 2 + "score": 0.3523239132597748, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.2492031334256811, - "sentence_nr": 2 + "score": 0.1781853859048144, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4923163374806021, - "sentence_nr": 2 + "score": 0.36865727091511874, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.2907608105126149, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5445465034944268, - "sentence_nr": 2 + "score": 0.2961559727627133, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.30219157030008637, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5492288689573782, - "sentence_nr": 2 + "score": 0.2775686235755007, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.2166046272179384, - "sentence_nr": 2 + "score": 0.12416350645592025, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5246154268333804, - "sentence_nr": 2 + "score": 0.23265120010755289, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.17714787947168362, - "sentence_nr": 2 + "score": 0.12846497020051437, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4959629506636555, - "sentence_nr": 2 + "score": 0.2670865602673704, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.37819926894757755, - "sentence_nr": 2 + "score": 0.217554942150074, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6132811919990925, - "sentence_nr": 2 + "score": 0.4362387654025806, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.24793827875738764, - "sentence_nr": 2 + "score": 0.18772266185346026, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5551406879143232, - "sentence_nr": 2 + "score": 0.4516796575038181, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.28252374116432993, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3549531183419122, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.5143871785925975, - "sentence_nr": 2 + "score": 0.12876689524369925, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.7463887744436826, - "sentence_nr": 2 + "score": 0.3034375834959013, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.1568616676699092, - "sentence_nr": 2 + "score": 0.10531636385748798, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3333409033359294, - "sentence_nr": 2 + "score": 0.15775047351971955, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2256379391347521, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4863283179636851, - "sentence_nr": 2 + "score": 0.26128489301072644, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.240340920378981, - "sentence_nr": 2 + "score": 0.18285404868730815, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4624667456597986, - "sentence_nr": 2 + "score": 0.30239028036773985, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.43104504141832617, - "sentence_nr": 2 + "score": 0.1789898550500511, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5953439401847398, - "sentence_nr": 2 + "score": 0.3097165910502381, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.27907188689389983, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5093017176589221, - "sentence_nr": 2 + "score": 0.2126837065505244, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.2276330877377012, - "sentence_nr": 2 + "score": 0.23292164090728384, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.43289197888514347, - "sentence_nr": 2 + "score": 0.4807364086898486, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.28652679283739385, - "sentence_nr": 2 + "score": 0.15742302643532463, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4341260827393413, - "sentence_nr": 2 + "score": 0.3220278551038813, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.21815383167015925, - "sentence_nr": 2 + "score": 0.18889796346849766, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.37444773636111656, - "sentence_nr": 2 + "score": 0.390828018955539, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.38031866584113244, - "sentence_nr": 2 + "score": 0.15404632289830114, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.6100833406476935, - "sentence_nr": 2 + "score": 0.3934040018417114, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0.22991579208155866, - "sentence_nr": 2 + "score": 0.14855426866172083, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.4195683527550329, - "sentence_nr": 2 + "score": 0.4089031318363594, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.1736086198203101, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3517982963278223, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3529455532976322, - "sentence_nr": 2 + "score": 0.15606652450871636, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5455305267266531, - "sentence_nr": 2 + "score": 0.38676973597326414, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2119156724475127, - "sentence_nr": 2 + "score": 0.346045680932875, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.37835612878198044, - "sentence_nr": 2 + "score": 0.4621083930255766, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.23150355132919254, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.48517162463112556, - "sentence_nr": 2 + "score": 0.25751650996406256, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.35592474790742606, - "sentence_nr": 2 + "score": 0.13628770358024436, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5565115125775245, - "sentence_nr": 2 + "score": 0.30464291275706445, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.20863984464930022, - "sentence_nr": 2 + "score": 0.2873180113751827, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.45879801940552783, - "sentence_nr": 2 + "score": 0.3691291664744644, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2695149221768555, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4713033964653895, - "sentence_nr": 2 + "score": 0.21636840076404606, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.08839914053546608, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.13210046935115544, - "sentence_nr": 2 + "score": 0.29331713922012836, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.22669629371608005, - "sentence_nr": 2 + "score": 0.18582826054135923, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.4780713176952279, - "sentence_nr": 2 + "score": 0.27931164611946097, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2062051322624683, - "sentence_nr": 2 + "score": 0.13628770358024436, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.41192098101671093, - "sentence_nr": 2 + "score": 0.196045070105177, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.4351160994155454, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7075991953765537, - "sentence_nr": 2 + "score": 0.22256474447332572, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2505547110465864, - "sentence_nr": 2 + "score": 0.1087256678530004, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.48036570052288885, - "sentence_nr": 2 + "score": 0.1812150267056357, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2164949874511416, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.32736826808813946, - "sentence_nr": 2 + "score": 0.09624090077172921, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.37294595046144213, - "sentence_nr": 2 + "score": 0.27274442393032494, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.598127662881266, - "sentence_nr": 2 + "score": 0.4767475272675149, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.0034593773364647584, - "sentence_nr": 2 + "score": 0.08206174754800233, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.10587910341470286, - "sentence_nr": 2 + "score": 0.24600065227133203, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3295957765387521, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5207914581240252, - "sentence_nr": 2 + "score": 0.2894206730562163, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.27075075499555246, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5201548999535662, - "sentence_nr": 2 + "score": 0.32404902054836443, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.27338789256007584, - "sentence_nr": 2 + "score": 0.13600287923663476, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.5429269981031598, - "sentence_nr": 2 + "score": 0.296137260965594, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "bleu", - "score": 0.2075953797357176, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_from", "metric": "chrf", - "score": 0.4344742362498603, - "sentence_nr": 2 + "score": 0.29939861106519894, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.293816771214877, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.4842449940538771, - "sentence_nr": 2 + "score": 0.2711712970899214, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.2063529291350913, - "sentence_nr": 2 + "score": 0.12748547320686965, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.48123766976272336, - "sentence_nr": 2 + "score": 0.2256255768392581, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "bleu", - "score": 0.11546772122737221, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_from", "metric": "chrf", - "score": 0.2604812246395473, - "sentence_nr": 2 + "score": 0.13897785762455162, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.47203392907202957, - "sentence_nr": 2 + "score": 0.2418791601714353, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.6897980578458137, - "sentence_nr": 2 + "score": 0.3327258966414523, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0.26709890828869226, - "sentence_nr": 2 + "score": 0.18472476303374016, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.5611797797204635, - "sentence_nr": 2 + "score": 0.3755456818093384, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.16879768238484785, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.3658141331541051, - "sentence_nr": 2 + "score": 0.10028126671358768, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.6386889736882309, - "sentence_nr": 2 + "score": 0.21465229625864304, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.07964662206989197, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.08209382469898788, - "sentence_nr": 2 + "score": 0.12004125280185217, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "bleu", - "score": 0.24759502840925565, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_from", "metric": "chrf", - "score": 0.5330580248133261, - "sentence_nr": 2 + "score": 0.13075268692454742, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.2666372228396489, - "sentence_nr": 2 + "score": 0.16340836420369564, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5839132669613946, - "sentence_nr": 2 + "score": 0.36283746947775286, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.3703971546860334, - "sentence_nr": 2 + "score": 0.2012788513843773, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.6509854048597393, - "sentence_nr": 2 + "score": 0.3241416301984527, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "bleu", - "score": 0.3020089249326176, - "sentence_nr": 2 + "score": 0.222140444588514, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_from", "metric": "chrf", - "score": 0.5666791239956741, - "sentence_nr": 2 + "score": 0.29953031595565194, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.3169340575963432, - "sentence_nr": 2 + "score": 0.17092467746295725, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.6047772126282382, - "sentence_nr": 2 + "score": 0.36754216836174997, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.2728224724839342, - "sentence_nr": 2 + "score": 0.2290624375320133, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.5858336859170117, - "sentence_nr": 2 + "score": 0.3703637148185826, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "bleu", - "score": 0.1649662542496744, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_from", "metric": "chrf", - "score": 0.44732894301721, - "sentence_nr": 2 + "score": 0.20316405901471601, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.37801805838989, - "sentence_nr": 2 + "score": 0.2048509043540121, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.6674788880655028, - "sentence_nr": 2 + "score": 0.30660769477099914, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0.20812209921683228, - "sentence_nr": 2 + "score": 0.13583266175611605, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.4762583476044399, - "sentence_nr": 2 + "score": 0.3216755467134329, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.15351923691242442, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.30140436874237964, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.6234338871585586, - "sentence_nr": 2 + "score": 0.2625069486168988, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.13582344277578873, - "sentence_nr": 2 + "score": 0.18398732411042557, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.23233050093887114, - "sentence_nr": 2 + "score": 0.2401204673210609, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "bleu", - "score": 0.27045803893058445, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_from", "metric": "chrf", - "score": 0.5815404493073867, - "sentence_nr": 2 + "score": 0.2557074827472021, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.2667836062177809, - "sentence_nr": 2 + "score": 0.14645066834461026, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.4889374373828587, - "sentence_nr": 2 + "score": 0.3063157759715837, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.1515551103099189, - "sentence_nr": 2 + "score": 0.13937542038981274, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.49455791760408774, - "sentence_nr": 2 + "score": 0.2484339727687842, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "bleu", - "score": 0.1059786102229136, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_from", "metric": "chrf", - "score": 0.2561557976916047, - "sentence_nr": 2 + "score": 0.2737880982480958, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.15507100728722165, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.48322409198286276, - "sentence_nr": 2 + "score": 0.4018398428695008, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.14889095388455822, - "sentence_nr": 2 + "score": 0.13382613080002836, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.41536400762130277, - "sentence_nr": 2 + "score": 0.2537208483050712, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "bleu", - "score": 0.12486557620383446, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_from", "metric": "chrf", - "score": 0.2904789102327634, - "sentence_nr": 2 + "score": 0.2630950732259493, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.3971731969967373, - "sentence_nr": 2 + "score": 0.19834633509680927, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.6965186338399049, - "sentence_nr": 2 + "score": 0.3614699644212251, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0.13551668809076822, - "sentence_nr": 2 + "score": 0.19000532642952978, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.4424571214083723, - "sentence_nr": 2 + "score": 0.2405617210713385, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.13181313433495553, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.22684236479857312, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.2697482929758505, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.6331597127209819, - "sentence_nr": 2 + "score": 0.21010332378415866, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.10369816700638204, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.2527691069954848, - "sentence_nr": 2 + "score": 0.16049139739945859, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "bleu", - "score": 0.11930191477839873, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_from", "metric": "chrf", - "score": 0.26295403442210813, - "sentence_nr": 2 + "score": 0.019878741152560272, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.23919877618601593, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.5302876334280949, - "sentence_nr": 2 + "score": 0.22559518514805962, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.24664751641319077, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.48702383483350364, - "sentence_nr": 2 + "score": 0.10902248103931993, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "bleu", - "score": 0.2103019561790119, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_from", "metric": "chrf", - "score": 0.4375454771782611, - "sentence_nr": 2 + "score": 0.16583300501883477, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.21396075329540654, - "sentence_nr": 2 + "score": 0.2154897801937284, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.5173735729399421, - "sentence_nr": 2 + "score": 0.4083528236853434, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "bleu", - "score": 0.22941797870527758, - "sentence_nr": 2 + "score": 0.1740044679403827, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.42186981875418683, - "sentence_nr": 2 + "score": 0.3783149893116463, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.1465911128169728, - "sentence_nr": 2 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", + "metric": "bleu", + "score": 0.1258273118584677, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_from", "metric": "chrf", - "score": 0.3079988190146739, - "sentence_nr": 2 + "score": 0.3207394929288255, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.35317260323737165, - "sentence_nr": 2 + "score": 0.16574121720327287, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.6408594869465616, - "sentence_nr": 2 + "score": 0.3831242265731415, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.2736255491551285, - "sentence_nr": 2 + "score": 0.25011851152889697, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.5566377410597074, - "sentence_nr": 2 + "score": 0.37126196571844006, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "bleu", - "score": 0.11568463948689758, - "sentence_nr": 2 + "score": 0.05671337518059672, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_from", "metric": "chrf", - "score": 0.3673356694093524, - "sentence_nr": 2 + "score": 0.26251266297831083, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.2940297877008057, - "sentence_nr": 2 + "score": 0.10085167559661873, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.542815022290297, - "sentence_nr": 2 + "score": 0.18125609699008438, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.06203726059862019, - "sentence_nr": 2 + "score": 0.07021707359312077, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.06824072321166202, - "sentence_nr": 2 + "score": 0.22277615768791725, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "bleu", - "score": 0.2119156724475127, - "sentence_nr": 2 + "score": 0.08568635726825895, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_from", "metric": "chrf", - "score": 0.48896874501469645, - "sentence_nr": 2 + "score": 0.15015893458515112, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.3556610867487636, - "sentence_nr": 2 + "score": 0.2511187197601112, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.5896623713361566, - "sentence_nr": 2 + "score": 0.47819642315416905, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.43209473956081024, - "sentence_nr": 2 + "score": 0.3506669990311196, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.6466471725002415, - "sentence_nr": 2 + "score": 0.5110276476843241, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "bleu", - "score": 0.343734330975999, - "sentence_nr": 2 + "score": 0.2823806497463373, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_from", "metric": "chrf", - "score": 0.5961090979865409, - "sentence_nr": 2 + "score": 0.4097065111577764, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.3576272831971449, - "sentence_nr": 2 + "score": 0.1537414828207279, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.6244083944373213, - "sentence_nr": 2 + "score": 0.3722050787080825, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.4251503518017069, - "sentence_nr": 2 + "score": 0.20079789489773447, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.649231006554981, - "sentence_nr": 2 + "score": 0.36306781014142475, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "bleu", - "score": 0.2312335616732656, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_from", "metric": "chrf", - "score": 0.454430450698693, - "sentence_nr": 2 + "score": 0.18254530689454584, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.449694989832495, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.6848441845378601, - "sentence_nr": 2 + "score": 0.2572390116372129, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.33036326194813054, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.5335695187570914, - "sentence_nr": 2 + "score": 0.13966768009198655, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "bleu", - "score": 0.24715873794308874, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_from", "metric": "chrf", - "score": 0.47778531553578674, - "sentence_nr": 2 + "score": 0.2813477376997818, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.4173623671609102, - "sentence_nr": 2 + "score": 0.15014755317658912, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.652879990525409, - "sentence_nr": 2 + "score": 0.37836116314741347, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.20128907818884004, - "sentence_nr": 2 + "score": 0.06225053846006199, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.4174485891003447, - "sentence_nr": 2 + "score": 0.19628052952282465, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "bleu", - "score": 0.3020162743522857, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_from", "metric": "chrf", - "score": 0.5382164286742842, - "sentence_nr": 2 + "score": 0.2461113033172792, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.260409852867913, - "sentence_nr": 2 + "score": 0.13600898159884844, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.4693600515228538, - "sentence_nr": 2 + "score": 0.3230757564905893, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.21544027588567594, - "sentence_nr": 2 + "score": 0.2235267829375094, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.4576381595573422, - "sentence_nr": 2 + "score": 0.4197003930616035, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "bleu", - "score": 0.2545286403887288, - "sentence_nr": 2 + "score": 0.12017396628208415, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_from", "metric": "chrf", - "score": 0.46127229234959366, - "sentence_nr": 2 + "score": 0.17861403940933454, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.24328450115124742, - "sentence_nr": 2 + "score": 0.13181313433495553, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.4776388219964363, - "sentence_nr": 2 + "score": 0.28588995835566733, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.25425921396874424, - "sentence_nr": 2 + "score": 0.1509901905071242, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.42317726915360054, - "sentence_nr": 2 + "score": 0.42614589006990955, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "bleu", - "score": 0.02275832756960615, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_from", "metric": "chrf", - "score": 0.05875660911445021, - "sentence_nr": 2 + "score": 0.16422774333078005, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.347369324256538, - "sentence_nr": 2 + "score": 0.19928950404960785, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.5365384741618354, - "sentence_nr": 2 + "score": 0.3515916946368607, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.24053623916497383, - "sentence_nr": 2 + "score": 0.23693055763743093, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.4750171806233396, - "sentence_nr": 2 + "score": 0.47110207134358734, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "bleu", - "score": 0.17580263472776825, - "sentence_nr": 2 + "score": 0.12322620396842734, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_from", "metric": "chrf", - "score": 0.3076957578052248, - "sentence_nr": 2 + "score": 0.2816556920367689, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.32494178152665526, - "sentence_nr": 2 + "score": 0.1250305362182298, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.5045476589637113, - "sentence_nr": 2 + "score": 0.35192066105839037, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.13181313433495553, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.06347730185717815, - "sentence_nr": 2 + "score": 0.23734991438269704, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "bleu", - "score": 0.29373397057939277, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_from", "metric": "chrf", - "score": 0.4428156698365815, - "sentence_nr": 2 + "score": 0.05708041498286245, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.24874987153684608, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.4814988208653403, - "sentence_nr": 2 + "score": 0.39208241867588406, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.4140011428776289, - "sentence_nr": 2 + "score": 0.12671660613804978, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.6412021306400884, - "sentence_nr": 2 + "score": 0.3813787743264216, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "bleu", - "score": 0.3759002268420169, - "sentence_nr": 2 + "score": 0.16338968219757316, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_from", "metric": "chrf", - "score": 0.6120997127625288, - "sentence_nr": 2 + "score": 0.43516585142042474, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.29037747307996287, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.5309836036249713, - "sentence_nr": 2 + "score": 0.3141940399992296, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 + "score": 0.2175732217739929, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.34244874984732915, - "sentence_nr": 2 + "score": 0.26934434229495274, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "bleu", - "score": 0.1325275948331608, - "sentence_nr": 2 + "score": 0.10434360980785336, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_from", "metric": "chrf", - "score": 0.3205120958231771, - "sentence_nr": 2 + "score": 0.1965525334564022, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.5065435153109259, - "sentence_nr": 2 + "score": 0.19331968002780792, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.7032321712710279, - "sentence_nr": 2 + "score": 0.30653379537681946, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.1768973089848016, - "sentence_nr": 2 + "score": 0.16419136872156925, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.42719621083968545, - "sentence_nr": 2 + "score": 0.38578825514877557, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "bleu", - "score": 0.17901446805295448, - "sentence_nr": 2 + "score": 0.11319316697505612, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_from", "metric": "chrf", - "score": 0.3256054730322296, - "sentence_nr": 2 + "score": 0.3044933526278424, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.40124730128694536, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.6826212173562594, - "sentence_nr": 2 + "score": 0.10372851412328025, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.15196406724218742, - "sentence_nr": 2 + "score": 0.05428552151774627, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.16323864864249038, - "sentence_nr": 2 + "score": 0.08170272572786982, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "bleu", - "score": 0.01246150158758297, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_from", "metric": "chrf", - "score": 0.10376214592757614, - "sentence_nr": 2 + "score": 0.054560358307203495, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.17716893523927718, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.3125133953892873, - "sentence_nr": 2 + "score": 0.2197940423573754, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.22843578925939137, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.39333887911230325, - "sentence_nr": 2 + "score": 0.3252925042873819, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "bleu", - "score": 0.19035778476657209, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_from", "metric": "chrf", - "score": 0.32011375391986463, - "sentence_nr": 2 + "score": 0.2386512909161297, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.1317872333008798, - "sentence_nr": 2 + "score": 0.16894127989367852, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.3352902644692564, - "sentence_nr": 2 + "score": 0.3419460050290285, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.18961182664205528, - "sentence_nr": 2 + "score": 0.3808157877385458, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.3020662042654857, - "sentence_nr": 2 + "score": 0.4468825428794822, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "bleu", - "score": 0.017943510022106292, - "sentence_nr": 2 + "score": 0.0810371533925042, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_from", "metric": "chrf", - "score": 0.0529980883670151, - "sentence_nr": 2 + "score": 0.14661692946967528, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.0027688975221579555, - "sentence_nr": 2 + "score": 0.14062598436731893, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.018304015812073582, - "sentence_nr": 2 + "score": 0.3764695982007195, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0.1620384468018578, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.34746269165329957, - "sentence_nr": 2 + "score": 0.26772209592710927, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.16587560311800356, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.36304298665164114, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.22571464820591175, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.3765675993955885, - "sentence_nr": 2 + "score": 0.19207278754983098, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.10040883216956197, - "sentence_nr": 2 + "score": 0.18980024752692398, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.24797453521432172, - "sentence_nr": 2 + "score": 0.17719555918098742, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "bleu", - "score": 0.06951973289421576, - "sentence_nr": 2 + "score": 0.125919848913809, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_from", "metric": "chrf", - "score": 0.2807452013801013, - "sentence_nr": 2 + "score": 0.12820373892360745, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.21529598963807312, - "sentence_nr": 2 + "score": 0.1968536715007284, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.47472255443386435, - "sentence_nr": 2 + "score": 0.333542500417417, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.19319522417917573, - "sentence_nr": 2 + "score": 0.17016486621490087, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.5294666692683903, - "sentence_nr": 2 + "score": 0.3423671585123332, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "bleu", - "score": 0.1876442538016413, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_from", "metric": "chrf", - "score": 0.45717296303154553, - "sentence_nr": 2 + "score": 0.20172859170422008, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.18002829271425153, - "sentence_nr": 2 + "score": 0.14976409594561182, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4760283281580435, - "sentence_nr": 2 + "score": 0.35357199599186406, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.18247822039542128, - "sentence_nr": 2 + "score": 0.1665583359843711, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.4600264307679993, - "sentence_nr": 2 + "score": 0.2627459510960287, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "bleu", - "score": 0.175300149784418, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_from", "metric": "chrf", - "score": 0.41324550460485304, - "sentence_nr": 2 + "score": 0.26627996704195217, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.24966398552210345, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.5875552237855669, - "sentence_nr": 2 + "score": 0.20740300708624634, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0.18002829271425153, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.47969369143545676, - "sentence_nr": 2 + "score": 0.11206360906932318, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.11591282390598331, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.2522249768533851, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5507570621049205, - "sentence_nr": 2 + "score": 0.19393790238357375, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.15228884803717702, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.14339231237474268, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "bleu", - "score": 0.2099318410412026, - "sentence_nr": 2 + "score": 0.11424261736422782, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_from", "metric": "chrf", - "score": 0.5209999254031551, - "sentence_nr": 2 + "score": 0.2214641710932888, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.22319344534343544, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.47255822473411646, - "sentence_nr": 2 + "score": 0.2354734090463839, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.3431794518924713, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.5291073153069198, - "sentence_nr": 2 + "score": 0.15803708011407422, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "bleu", - "score": 0.2075953797357176, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_from", "metric": "chrf", - "score": 0.418796448457094, - "sentence_nr": 2 + "score": 0.1534362334139164, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.2563564295134795, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.5093318092996159, - "sentence_nr": 2 + "score": 0.029484944886992947, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.2866708602742022, - "sentence_nr": 2 + "score": 0.08447773742536654, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.571127006270378, - "sentence_nr": 2 + "score": 0.13534893625087907, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "bleu", - "score": 0.02295424057510269, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_from", "metric": "chrf", - "score": 0.057552678044422, - "sentence_nr": 2 + "score": 0.09370499064568831, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.4630071026583851, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.667540987593188, - "sentence_nr": 2 + "score": 0.41557169141417455, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.24664751641319077, - "sentence_nr": 2 + "score": 0.2152971203854131, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.5273132664458353, - "sentence_nr": 2 + "score": 0.45844266102619613, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "bleu", - "score": 0.23904922011090457, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_from", "metric": "chrf", - "score": 0.46170328338282635, - "sentence_nr": 2 + "score": 0.3510795373941694, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.27317735109275526, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.5633968676502531, - "sentence_nr": 2 + "score": 0.2603954279622387, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.11726841339493706, - "sentence_nr": 2 + "score": 0.30277029197532107, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.26372027377410673, - "sentence_nr": 2 + "score": 0.4119930658213665, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "bleu", - "score": 0.18523686153564775, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_from", "metric": "chrf", - "score": 0.4293966276650039, - "sentence_nr": 2 + "score": 0.3172363525732528, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.17593291675420053, - "sentence_nr": 2 + "score": 0.09824473131578967, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.45966885600223345, - "sentence_nr": 2 + "score": 0.26161454205511375, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.30749506855677367, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.5615365420131465, - "sentence_nr": 2 + "score": 0.17813207960290023, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "bleu", - "score": 0.2539342198718324, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_from", "metric": "chrf", - "score": 0.48976692911803554, - "sentence_nr": 2 + "score": 0.12613950642084928, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.30768118683142304, - "sentence_nr": 2 + "score": 0.1077448900968642, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.5089115971222962, - "sentence_nr": 2 + "score": 0.1466632434186726, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.19721007805842014, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.35728524815277984, - "sentence_nr": 2 + "score": 0.19398573687939527, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "bleu", - "score": 0.014993257223678707, - "sentence_nr": 2 + "score": 0.11116961409150189, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_from", "metric": "chrf", - "score": 0.05313833864311007, - "sentence_nr": 2 + "score": 0.14407603400456293, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.25109549502043527, - "sentence_nr": 2 + "score": 0.1535686541317235, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.5476903392712803, - "sentence_nr": 2 + "score": 0.3273820311085689, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0.1961515501265013, - "sentence_nr": 2 + "score": 0.1479757676110522, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.48948565712613856, - "sentence_nr": 2 + "score": 0.36882257747840863, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.26565870470756586, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.30685590287990516, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.5269783493077268, - "sentence_nr": 2 + "score": 0.2393868174483411, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.14361580529268292, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.21488656415712018, - "sentence_nr": 2 + "score": 0.16951713127949472, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "bleu", - "score": 0.028831974404303608, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_from", "metric": "chrf", - "score": 0.21532076850060974, - "sentence_nr": 2 + "score": 0.13967106347277614, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.26939482991021874, - "sentence_nr": 2 + "score": 0.5745954681260859, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.564348572305916, - "sentence_nr": 2 + "score": 0.7920051188244848, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.28232804221956187, - "sentence_nr": 2 + "score": 0.6358921902612438, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.639242930472136, - "sentence_nr": 2 + "score": 0.8041899227402122, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.24677721152898274, - "sentence_nr": 2 + "score": 0.6299285159340671, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.5655655793718459, - "sentence_nr": 2 + "score": 0.7993134129243716, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.15162461704349048, - "sentence_nr": 2 + "score": 0.3572514590810421, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.47435908632426016, - "sentence_nr": 2 + "score": 0.40312319760122833, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.06876893749716347, - "sentence_nr": 2 + "score": 0.35059076445515835, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.3746922202825928, - "sentence_nr": 2 + "score": 0.40219803477483124, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.17515913581878667, - "sentence_nr": 2 + "score": 0.41316127706749806, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.5159640302059034, - "sentence_nr": 2 + "score": 0.4430321339435623, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.3114493863658917, - "sentence_nr": 2 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.6199795063284539, - "sentence_nr": 2 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.8780634320789833, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.3977612455119272, - "sentence_nr": 2 + "score": 0.926946700115022, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.7964573357809173, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.8458636471716781, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.2958502265911963, - "sentence_nr": 2 + "score": 0.35601247064914876, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.6081864133794638, - "sentence_nr": 2 + "score": 0.6528728847159075, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.118073003714978, - "sentence_nr": 2 + "score": 0.40673971192998765, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.4725327044603119, - "sentence_nr": 2 + "score": 0.6897190926100627, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.18975464141824344, - "sentence_nr": 2 + "score": 0.3707525915417785, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.483345451709862, - "sentence_nr": 2 + "score": 0.6481906761834414, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.177282908048097, - "sentence_nr": 2 + "score": 0.5724622291345857, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5048008630035653, - "sentence_nr": 2 + "score": 0.6818279156433621, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.24609114091724077, - "sentence_nr": 2 + "score": 0.703373719677874, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5163247162943534, - "sentence_nr": 2 + "score": 0.7784050705257474, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.20586736678432452, - "sentence_nr": 2 + "score": 0.469958733898233, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5290915360201753, - "sentence_nr": 2 + "score": 0.5843756060033074, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.29019853911874177, - "sentence_nr": 2 + "score": 0.2676232320051144, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.5949894213257197, - "sentence_nr": 2 + "score": 0.5440246804235981, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.3070367955852388, - "sentence_nr": 2 + "score": 0.3489926819498492, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.6242613917648033, - "sentence_nr": 2 + "score": 0.5715668842319502, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.16310803315335595, - "sentence_nr": 2 + "score": 0.2786169604662155, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.385251224878983, - "sentence_nr": 2 + "score": 0.5267252236203236, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.2757308936630587, - "sentence_nr": 2 + "score": 0.3765213224289163, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.6126334851686046, - "sentence_nr": 2 + "score": 0.6469521424555786, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.21805912847922992, - "sentence_nr": 2 + "score": 0.3410244689880313, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.5462064223958267, - "sentence_nr": 2 + "score": 0.5816669416914216, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.2189685645981704, - "sentence_nr": 2 + "score": 0.4234343012313773, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.4764545724040846, - "sentence_nr": 2 + "score": 0.6625289905598352, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.19770260950111818, - "sentence_nr": 2 + "score": 0.44219732271776674, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.4864234961050757, - "sentence_nr": 2 + "score": 0.6193429426274062, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.1281637706417447, - "sentence_nr": 2 + "score": 0.4324680011853555, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.26820390293632596, - "sentence_nr": 2 + "score": 0.5877600878871951, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.17178273549483283, - "sentence_nr": 2 + "score": 0.4493940083619696, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.507628318520069, - "sentence_nr": 2 + "score": 0.6230960824462234, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.05670064571372339, - "sentence_nr": 2 + "score": 0.6638859619095425, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.16937000725041657, - "sentence_nr": 2 + "score": 0.7874224590682172, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.4219264367109449, - "sentence_nr": 2 + "score": 0.6947677373756656, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5790052627496669, - "sentence_nr": 2 + "score": 0.7941300666655116, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.21550905403743137, - "sentence_nr": 2 + "score": 0.6412098671661826, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.44719679117350436, - "sentence_nr": 2 + "score": 0.7665040244283648, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.27007601385228264, - "sentence_nr": 2 + "score": 0.4845227999608418, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.4458020398892479, - "sentence_nr": 2 + "score": 0.5968050469845498, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.25300577345515013, - "sentence_nr": 2 + "score": 0.3861375213265022, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.44175219977629965, - "sentence_nr": 2 + "score": 0.5122109329134508, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 + "score": 0.32539921259497445, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.4024289879771283, - "sentence_nr": 2 + "score": 0.5133457276293165, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.4216050739426583, - "sentence_nr": 2 + "score": 0.47269414327373943, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.6728600493371104, - "sentence_nr": 2 + "score": 0.7451099574206652, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.2756885721075884, - "sentence_nr": 2 + "score": 0.4354194543126476, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5307227437376365, - "sentence_nr": 2 + "score": 0.6656175329857803, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.5941815558294462, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.7983203558832153, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.21991348529919003, - "sentence_nr": 2 + "score": 0.4143449478847806, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.50217348570388, - "sentence_nr": 2 + "score": 0.6736569430464404, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.14089726824262236, - "sentence_nr": 2 + "score": 0.3983045920261205, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.29585146611510377, - "sentence_nr": 2 + "score": 0.6521777108605036, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.19420534060688374, - "sentence_nr": 2 + "score": 0.268203877206376, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.432678989151994, - "sentence_nr": 2 + "score": 0.5791297455379081, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.19946335945716726, - "sentence_nr": 2 + "score": 0.3720001389308944, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5412386252302255, - "sentence_nr": 2 + "score": 0.7013845085492982, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.27966169949383496, - "sentence_nr": 2 + "score": 0.17329990217896798, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.537239861484062, - "sentence_nr": 2 + "score": 0.583781848253705, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.32980384185673844, - "sentence_nr": 2 + "score": 0.3373556859342653, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.6175883753955328, - "sentence_nr": 2 + "score": 0.6738789170291255, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.31318569084293774, - "sentence_nr": 2 + "score": 0.4494597917400064, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.6117611701035811, - "sentence_nr": 2 + "score": 0.7073438158390085, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.33705661165643946, - "sentence_nr": 2 + "score": 0.4446730260276365, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.624031397469919, - "sentence_nr": 2 + "score": 0.755908468739292, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.037115030218903694, - "sentence_nr": 2 + "score": 0.43478132178539325, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.1788805094660807, - "sentence_nr": 2 + "score": 0.7018196083546635, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.3860080723844807, - "sentence_nr": 2 + "score": 0.3268845394941929, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.6983645725285298, - "sentence_nr": 2 + "score": 0.5177343510524726, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.27353964831796046, - "sentence_nr": 2 + "score": 0.28038937103419465, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.5222330343086904, - "sentence_nr": 2 + "score": 0.5038494750471553, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.25718495991757767, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4369258127692308, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.32310721474905496, - "sentence_nr": 2 + "score": 0.2598777182656361, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.6324724806146809, - "sentence_nr": 2 + "score": 0.5891629072611128, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0982831645960075, - "sentence_nr": 2 + "score": 0.2989825848955476, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.1272762669734629, - "sentence_nr": 2 + "score": 0.5979372541912472, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.20113161707875454, - "sentence_nr": 2 + "score": 0.21832405319137094, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.5185589580301894, - "sentence_nr": 2 + "score": 0.538599666863523, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.14382854899355546, - "sentence_nr": 2 + "score": 0.15922876554252086, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.26400383568118985, - "sentence_nr": 2 + "score": 0.39205452902204047, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2607899218485117, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.21514404656488983, - "sentence_nr": 2 + "score": 0.48702498962539814, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.19112125497758137, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.22551384015559367, - "sentence_nr": 2 + "score": 0.39870104995538236, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.1314926852888956, - "sentence_nr": 2 + "score": 0.1061903685456685, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.2730576168913583, - "sentence_nr": 2 + "score": 0.48402103757719056, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.10759353804031296, - "sentence_nr": 2 + "score": 0.16158718602920916, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.2469469161435681, - "sentence_nr": 2 + "score": 0.529420842984689, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.02005744277065384, - "sentence_nr": 2 + "score": 0.11815153359326112, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.1517015859852234, - "sentence_nr": 2 + "score": 0.4449148298646824, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.14463738798777623, - "sentence_nr": 2 + "score": 0.4840168429540741, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.2589218890086074, - "sentence_nr": 2 + "score": 0.708011141564384, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.13690362900208325, - "sentence_nr": 2 + "score": 0.4280674425361325, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.2762422431482146, - "sentence_nr": 2 + "score": 0.6762864074630727, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.10560514433971112, - "sentence_nr": 2 + "score": 0.3476608425783186, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.22163408505698107, - "sentence_nr": 2 + "score": 0.6262956160259413, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.05392295714154769, - "sentence_nr": 2 + "score": 0.260560100033495, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.24000092793563588, - "sentence_nr": 2 + "score": 0.5367621571346236, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.10533275933768531, - "sentence_nr": 2 + "score": 0.31513262466552094, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.1454909685200551, - "sentence_nr": 2 + "score": 0.6088063321214817, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.09541186197466851, - "sentence_nr": 2 + "score": 0.328480036965797, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.1446377197560293, - "sentence_nr": 2 + "score": 0.5845846160692275, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.30538385012782954, - "sentence_nr": 2 + "score": 0.34818832435375924, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5121153023805728, - "sentence_nr": 2 + "score": 0.6021958119247758, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.21889549804942124, - "sentence_nr": 2 + "score": 0.3357114888560433, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.3940841212708787, - "sentence_nr": 2 + "score": 0.577627721559994, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.1882889817107982, - "sentence_nr": 2 + "score": 0.165917178563774, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.3522812586532728, - "sentence_nr": 2 + "score": 0.47788177649200114, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.23766627182164174, - "sentence_nr": 2 + "score": 0.3962150993148768, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.44815154837573024, - "sentence_nr": 2 + "score": 0.6615791069918547, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.28175950490399515, - "sentence_nr": 2 + "score": 0.2715808911127579, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.5034953110542267, - "sentence_nr": 2 + "score": 0.5912854463960201, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.006822057717339712, - "sentence_nr": 2 + "score": 0.3561131937728394, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.04590519704218084, - "sentence_nr": 2 + "score": 0.6360822051576717, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.4506022621318761, - "sentence_nr": 2 + "score": 0.30843187897727053, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.6366569943698206, - "sentence_nr": 2 + "score": 0.3486791970487533, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.2044537965576019, - "sentence_nr": 2 + "score": 0.30006024473641996, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.41662837209065434, - "sentence_nr": 2 + "score": 0.3256826624869121, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.20022216695167708, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.25475796453209737, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.2803589793821332, - "sentence_nr": 2 + "score": 0.23057388791871672, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.5139077856801352, - "sentence_nr": 2 + "score": 0.28722063151007776, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.18318367941785624, - "sentence_nr": 2 + "score": 0.20138961649645912, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.3112497745580003, - "sentence_nr": 2 + "score": 0.31544376598852375, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.2188460064841618, - "sentence_nr": 2 + "score": 0.18010353259801426, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.4111151775845119, - "sentence_nr": 2 + "score": 0.2514369893270279, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.3618488169166299, - "sentence_nr": 2 + "score": 0.3878454500259306, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6178847628712388, - "sentence_nr": 2 + "score": 0.6551636735823242, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.431319746325093, - "sentence_nr": 2 + "score": 0.3575554083195387, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6234382849939584, - "sentence_nr": 2 + "score": 0.6357905858551194, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.3161306379595585, - "sentence_nr": 2 + "score": 0.38341396472331346, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6012304838142994, - "sentence_nr": 2 + "score": 0.6437362313865919, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.3579300370931225, - "sentence_nr": 2 + "score": 0.36350084013796624, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.573996761928517, - "sentence_nr": 2 + "score": 0.6256665663158201, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.3080840787435305, - "sentence_nr": 2 + "score": 0.3212586102862301, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.4885219468370561, - "sentence_nr": 2 + "score": 0.6200344079713044, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.24325558783239473, - "sentence_nr": 2 + "score": 0.29542603524950894, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.39066410297361315, - "sentence_nr": 2 + "score": 0.6045890208480269, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.5356579160614433, - "sentence_nr": 2 + "score": 0.31094933095062055, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.7277322041803868, - "sentence_nr": 2 + "score": 0.49068855707946196, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.1738168213092765, - "sentence_nr": 2 + "score": 0.26045893297751727, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.43714714564599644, - "sentence_nr": 2 + "score": 0.4556002624646449, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.25769536520621106, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4479264653484257, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.4265406506976777, - "sentence_nr": 2 + "score": 0.3833596037687312, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.6947449483028119, - "sentence_nr": 2 + "score": 0.6303767331020778, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.12284465579716894, - "sentence_nr": 2 + "score": 0.33202003790347423, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.3000539818655044, - "sentence_nr": 2 + "score": 0.6008057921287521, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.13919020336387739, - "sentence_nr": 2 + "score": 0.5169325988486032, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.4856921552489764, - "sentence_nr": 2 + "score": 0.7139612818852349, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.31487248334376844, - "sentence_nr": 2 + "score": 0.5350553356274835, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5635244346599635, - "sentence_nr": 2 + "score": 0.6061404961531679, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.22897967367089514, - "sentence_nr": 2 + "score": 0.48468513699279653, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5334911242844559, - "sentence_nr": 2 + "score": 0.627759796330558, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.26709890828869226, - "sentence_nr": 2 + "score": 0.30044261567697145, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5042111985234817, - "sentence_nr": 2 + "score": 0.5072959120235889, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.334422418242443, - "sentence_nr": 2 + "score": 0.16111837221606704, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.5300778295156336, - "sentence_nr": 2 + "score": 0.3897870969473364, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.15215820165380464, - "sentence_nr": 2 + "score": 0.10167233309625472, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.3730291976418706, - "sentence_nr": 2 + "score": 0.3890033983715864, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.008068095572196444, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.06582628108710774, - "sentence_nr": 2 + "score": 0.3452100271202966, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.41092285395615147, - "sentence_nr": 2 + "score": 0.29945813065831656, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.627562152141329, - "sentence_nr": 2 + "score": 0.5652964318283186, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.21255280545319827, - "sentence_nr": 2 + "score": 0.3118864893794733, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.4650106669780648, - "sentence_nr": 2 + "score": 0.6025015600895789, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.21130045509267714, - "sentence_nr": 2 + "score": 0.4136053909671976, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.39738842872135566, - "sentence_nr": 2 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6322647535881054, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.2852158256303108, - "sentence_nr": 2 + "score": 0.2170322997863664, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.5592687569674767, - "sentence_nr": 2 + "score": 0.5313522319806132, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.2063890416514164, - "sentence_nr": 2 + "score": 0.26361261152036314, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.36145113398437073, - "sentence_nr": 2 + "score": 0.5725817956180734, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.23158047038710655, - "sentence_nr": 2 + "score": 0.1678792928110798, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.5061207857603639, - "sentence_nr": 2 + "score": 0.5011442124633559, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.4218999224827276, - "sentence_nr": 2 + "score": 0.40476128586074045, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.6489282208332532, - "sentence_nr": 2 + "score": 0.5819090999793647, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.26356793966181546, - "sentence_nr": 2 + "score": 0.31924314606108933, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.5191302272110829, - "sentence_nr": 2 + "score": 0.5643587039540041, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.20298700573422315, - "sentence_nr": 2 + "score": 0.46465436703612695, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.3905231106721993, - "sentence_nr": 2 + "score": 0.6187472056628097, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.1495562478801698, - "sentence_nr": 2 + "score": 0.23765992259974003, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.27927416817557615, - "sentence_nr": 2 + "score": 0.555280304898008, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.18014267897960143, - "sentence_nr": 2 + "score": 0.14222805709331154, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.3494520347780682, - "sentence_nr": 2 + "score": 0.4996646755261327, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.09315655656376064, - "sentence_nr": 2 + "score": 0.19867232982300087, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.2991354305100017, - "sentence_nr": 2 + "score": 0.4721193400035613, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.5247574818133272, - "sentence_nr": 2 + "score": 0.3540078976511945, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.7067933534801065, - "sentence_nr": 2 + "score": 0.5545213530835353, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.24328450115124742, - "sentence_nr": 2 + "score": 0.30065550001026614, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.5060248466367836, - "sentence_nr": 2 + "score": 0.5078021900511434, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.2861877593848395, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.497612333201281, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.21262671745682374, - "sentence_nr": 2 + "score": 0.26031375400822604, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.5260755429226434, - "sentence_nr": 2 + "score": 0.6435901265581367, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.15235290224609707, - "sentence_nr": 2 + "score": 0.28663599743133195, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.28302702194787677, - "sentence_nr": 2 + "score": 0.6523798359183501, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.21486676443988736, - "sentence_nr": 2 + "score": 0.26536873046244686, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.36871818136959744, - "sentence_nr": 2 + "score": 0.6087318332086314, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.3563758622144919, - "sentence_nr": 2 + "score": 0.16720443618568337, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.5746238432846977, - "sentence_nr": 2 + "score": 0.49728602865615185, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.34637568582379935, - "sentence_nr": 2 + "score": 0.2237271965508778, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.5514391895148156, - "sentence_nr": 2 + "score": 0.5188036228566616, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.24720511037119816, - "sentence_nr": 2 + "score": 0.1758067209813282, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.4462551342337241, - "sentence_nr": 2 + "score": 0.4901843428646942, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.35925132999878095, - "sentence_nr": 2 + "score": 0.11927023953333708, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.5699819571052286, - "sentence_nr": 2 + "score": 0.30235008020722276, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.38359501185588124, - "sentence_nr": 2 + "score": 0.23340024365107623, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.5602603893622257, - "sentence_nr": 2 + "score": 0.3615501139102133, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 + "score": 0.05099372834862051, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.4026544277401918, - "sentence_nr": 2 + "score": 0.19652888728631965, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.49510953811541075, - "sentence_nr": 2 + "score": 0.1552412395318371, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.7352883459765454, - "sentence_nr": 2 + "score": 0.46901193090489374, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.2981343875223826, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.477518453847399, - "sentence_nr": 2 + "score": 0.3628388824307182, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.2026004770366011, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.43250936001873813, - "sentence_nr": 2 + "score": 0.3074263382421554, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.3564982433809234, - "sentence_nr": 2 + "score": 0.33472487399064255, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.5955310812920889, - "sentence_nr": 2 + "score": 0.5323967201407147, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.15235290224609707, - "sentence_nr": 2 + "score": 0.2873736840134788, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.27566734637316337, - "sentence_nr": 2 + "score": 0.5010578596526507, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.2269289302318667, - "sentence_nr": 2 + "score": 0.23664302078708985, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4761409742673413, - "sentence_nr": 2 + "score": 0.4547035743969898, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.23380867598952562, - "sentence_nr": 2 + "score": 0.2262279556088844, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.4731313764465835, - "sentence_nr": 2 + "score": 0.5760442354634835, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.32326983669535764, - "sentence_nr": 2 + "score": 0.2531889405970385, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.5757950493268048, - "sentence_nr": 2 + "score": 0.6038946560162178, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.2920934313715234, - "sentence_nr": 2 + "score": 0.21931836015563075, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.4983574989743429, - "sentence_nr": 2 + "score": 0.5700300210408424, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.3026558282583112, - "sentence_nr": 2 + "score": 0.24134481763339574, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.4694574798065052, - "sentence_nr": 2 + "score": 0.5039460152723452, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.21310996044302127, - "sentence_nr": 2 + "score": 0.2348279301335576, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.41481507363997655, - "sentence_nr": 2 + "score": 0.5004871814463647, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.014046579612901993, - "sentence_nr": 2 + "score": 0.08237331643927553, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.06227213209427425, - "sentence_nr": 2 + "score": 0.39243470995755453, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.3710777220377626, - "sentence_nr": 2 + "score": 0.2142336001450868, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.6158333675751357, - "sentence_nr": 2 + "score": 0.5588585581750418, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.22797230914240135, - "sentence_nr": 2 + "score": 0.09648154986811164, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.49559704046335284, - "sentence_nr": 2 + "score": 0.445584066700863, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.18089468280268076, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5010395255727423, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.2680102298488869, - "sentence_nr": 2 + "score": 0.3101522533089342, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.5563627216923304, - "sentence_nr": 2 + "score": 0.604637604976383, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.2082228064731165, - "sentence_nr": 2 + "score": 0.38075337128063264, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3453459075359105, - "sentence_nr": 2 + "score": 0.6303731272422745, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.23462825598816128, - "sentence_nr": 2 + "score": 0.31147021123578866, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.4659487960951427, - "sentence_nr": 2 + "score": 0.5943977261450569, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.1529466247397943, - "sentence_nr": 2 + "score": 0.3167263933365905, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.4275296567755792, - "sentence_nr": 2 + "score": 0.5418529871586636, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.309848051124064, - "sentence_nr": 2 + "score": 0.22655843030546713, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.5622431891031534, - "sentence_nr": 2 + "score": 0.4345585649515925, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.2516768028374535, - "sentence_nr": 2 + "score": 0.3568884258190913, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.5052262603078841, - "sentence_nr": 2 + "score": 0.554204547339302, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.21090682439932437, - "sentence_nr": 2 + "score": 0.3954105975011071, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.5250285784398728, - "sentence_nr": 2 + "score": 0.665702837892058, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.1286693574614936, - "sentence_nr": 2 + "score": 0.31671795070859093, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.4441562924808585, - "sentence_nr": 2 + "score": 0.5824028981642948, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.037035772240318204, - "sentence_nr": 2 + "score": 0.2891201026404864, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.13309517159270826, - "sentence_nr": 2 + "score": 0.577907772782264, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.37081839104772296, - "sentence_nr": 2 + "score": 0.1440960867359316, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.6389376736347167, - "sentence_nr": 2 + "score": 0.317244026262994, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.19360150634553708, - "sentence_nr": 2 + "score": 0.09722244673293699, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.47334926984767134, - "sentence_nr": 2 + "score": 0.27113802032744583, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.13367647104732472, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.1416110914766342, - "sentence_nr": 2 + "score": 0.051791256473099075, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.45649224665512106, - "sentence_nr": 2 + "score": 0.365621155540599, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.13700830775707343, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.269069871167757, - "sentence_nr": 2 + "score": 0.3315860669681651, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.20310341961604592, - "sentence_nr": 2 + "score": 0.04012260068601062, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.4991920771058773, - "sentence_nr": 2 + "score": 0.2696128056189901, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.2380050699329688, - "sentence_nr": 2 + "score": 0.33711190260900914, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.48227504945496735, - "sentence_nr": 2 + "score": 0.5734011889114915, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.18624263881830802, - "sentence_nr": 2 + "score": 0.5580417299024397, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4914113027832365, - "sentence_nr": 2 + "score": 0.6857939145706653, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.2395446927992299, - "sentence_nr": 2 + "score": 0.18742174533907147, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4721484222602001, - "sentence_nr": 2 + "score": 0.5476333057836739, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.19228093786407296, - "sentence_nr": 2 + "score": 0.23173637276917056, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.4484750286265722, - "sentence_nr": 2 + "score": 0.4688999062248428, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.3110340303714584, - "sentence_nr": 2 + "score": 0.15237690319868696, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.5519716855578684, - "sentence_nr": 2 + "score": 0.4251100485788087, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.01702725324941803, - "sentence_nr": 2 + "score": 0.21347502421469977, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.08157175569560395, - "sentence_nr": 2 + "score": 0.45172058784807567, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.26887618694630055, - "sentence_nr": 2 + "score": 0.23160979893862282, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.5500003011471738, - "sentence_nr": 2 + "score": 0.5307682448377248, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.12680676386575712, - "sentence_nr": 2 + "score": 0.14917391092771218, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.4095969427556632, - "sentence_nr": 2 + "score": 0.5422937768900143, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3800143437750757, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.2533922732210082, - "sentence_nr": 2 + "score": 0.4541968077704817, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.546233396176927, - "sentence_nr": 2 + "score": 0.6895568152401309, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.12830494902442313, - "sentence_nr": 2 + "score": 0.5714674895346293, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.3254705907811071, - "sentence_nr": 2 + "score": 0.7513731235157427, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.19601813136817495, - "sentence_nr": 2 + "score": 0.5133263413524151, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.46178888135061663, - "sentence_nr": 2 + "score": 0.7211829449070788, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.20251299853063762, - "sentence_nr": 2 + "score": 0.35051963517598583, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.36192674925462354, - "sentence_nr": 2 + "score": 0.5957877277673226, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.12015228994776961, - "sentence_nr": 2 + "score": 0.3574343577949326, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.31437848676811814, - "sentence_nr": 2 + "score": 0.6497698769139355, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.17979384730979156, - "sentence_nr": 2 + "score": 0.40103978472952795, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.331355254735914, - "sentence_nr": 2 + "score": 0.6208328012476202, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.07956863030147791, - "sentence_nr": 2 + "score": 0.08894652425495941, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.27552150746832194, - "sentence_nr": 2 + "score": 0.41017528256257657, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.11845246173688026, - "sentence_nr": 2 + "score": 0.22464540307431463, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.2839119989582471, - "sentence_nr": 2 + "score": 0.5478473333306936, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.16636906484233852, - "sentence_nr": 2 + "score": 0.12081705083061788, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.28479590380073244, - "sentence_nr": 2 + "score": 0.42330662612715014, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.37042346597404774, - "sentence_nr": 2 + "score": 0.10809027944114537, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.5429200608002012, - "sentence_nr": 2 + "score": 0.41709635067440054, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.19938701089073135, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.3122788337958431, - "sentence_nr": 2 + "score": 0.24967258145788696, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2221215858584901, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.2152878500188154, - "sentence_nr": 2 + "score": 0.14721095611762033, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.36210098378060424, - "sentence_nr": 2 + "score": 0.5659463345055759, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0361216793750198, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.15096755094203476, - "sentence_nr": 2 + "score": 0.3833384649248694, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.23814364645160635, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.41371751248867406, - "sentence_nr": 2 + "score": 0.3068895477125971, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.3291256332376796, - "sentence_nr": 2 + "score": 0.10421454499099432, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.5670250015789864, - "sentence_nr": 2 + "score": 0.4482718797876954, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.3422882142242731, - "sentence_nr": 2 + "score": 0.09809739305025474, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.5278861608717469, - "sentence_nr": 2 + "score": 0.43620207901811114, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.38564863816921563, - "sentence_nr": 2 + "score": 0.06617762054068453, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.4887006722841345, - "sentence_nr": 2 + "score": 0.37651507936877043, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.23177043441348452, - "sentence_nr": 2 + "score": 0.30512514107707417, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.40414889866804304, - "sentence_nr": 2 + "score": 0.5253825791967487, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.24293747431834264, - "sentence_nr": 2 + "score": 0.29026498240122633, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.43597971711109645, - "sentence_nr": 2 + "score": 0.5281110282763906, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.18613958857202265, - "sentence_nr": 2 + "score": 0.21608519654486127, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.33365565390885554, - "sentence_nr": 2 + "score": 0.48049705425952316, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.5095567171193034, - "sentence_nr": 2 + "score": 0.1976518550814801, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.7144582369592706, - "sentence_nr": 2 + "score": 0.5244055159079589, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.31286911900957376, - "sentence_nr": 2 + "score": 0.26410908345664563, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.5694292229267216, - "sentence_nr": 2 + "score": 0.5677920940498072, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.23939069272230887, - "sentence_nr": 2 + "score": 0.2588340456184726, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.33856380909095196, - "sentence_nr": 2 + "score": 0.55968436904969, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.3563982585943877, - "sentence_nr": 2 + "score": 0.3004098099879873, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.49354275608420073, - "sentence_nr": 2 + "score": 0.47263075055005355, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.19282932367379912, - "sentence_nr": 2 + "score": 0.357133009467308, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.2575059813192898, - "sentence_nr": 2 + "score": 0.5076629572579336, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.17208226602890947, - "sentence_nr": 2 + "score": 0.3317061376682229, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.25645334428566413, - "sentence_nr": 2 + "score": 0.49116761313547225, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.2489574113984516, - "sentence_nr": 2 + "score": 0.07426303717403786, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.5438702135465744, - "sentence_nr": 2 + "score": 0.32419436785022854, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.25119117418063647, - "sentence_nr": 2 + "score": 0.32990225159044795, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.5358947011982449, - "sentence_nr": 2 + "score": 0.5675673358712833, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.24117223077042385, - "sentence_nr": 2 + "score": 0.10176396205307862, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.512020635779483, - "sentence_nr": 2 + "score": 0.32135019548795624, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.3340823391039827, - "sentence_nr": 2 + "score": 0.311511414478067, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.5653541300306412, - "sentence_nr": 2 + "score": 0.6033227598801263, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.2386641827505274, - "sentence_nr": 2 + "score": 0.22779349070551677, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.4975758233208786, - "sentence_nr": 2 + "score": 0.49148253463628344, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.21543832976633895, - "sentence_nr": 2 + "score": 0.22115130993130303, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.3494273034291109, - "sentence_nr": 2 + "score": 0.5521258614574281, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.3505419761309475, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.5987069983604556, - "sentence_nr": 2 + "score": 0.20542693687806493, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.21815383167015925, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.4982958514803751, - "sentence_nr": 2 + "score": 0.07296118741828052, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.2297132059983132, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.44671126187287913, - "sentence_nr": 2 + "score": 0.2422245788581219, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.27336087678628246, - "sentence_nr": 2 + "score": 0.09504015903132065, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.5594458824515739, - "sentence_nr": 2 + "score": 0.4293817955338464, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.2082228064731165, - "sentence_nr": 2 + "score": 0.15438932814303225, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.3245390541494279, - "sentence_nr": 2 + "score": 0.4752857803786606, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.24241809604223485, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.5139906329232172, - "sentence_nr": 2 + "score": 0.3324917175608352, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.23020656163897005, - "sentence_nr": 2 + "score": 0.40473260822223667, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5608590094117443, - "sentence_nr": 2 + "score": 0.7095471138795382, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.2988707080433144, - "sentence_nr": 2 + "score": 0.2952023404990282, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5286791480233601, - "sentence_nr": 2 + "score": 0.5787510650657961, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.29215021962379045, - "sentence_nr": 2 + "score": 0.27191061976527564, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5527751145536495, - "sentence_nr": 2 + "score": 0.5817893211228, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.28375086204441347, - "sentence_nr": 2 + "score": 0.417977745559327, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.6048386743476, - "sentence_nr": 2 + "score": 0.6309214915513178, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.3397920703569073, - "sentence_nr": 2 + "score": 0.37902282569315715, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.5935411202589298, - "sentence_nr": 2 + "score": 0.6372123873874835, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.1807114773593583, - "sentence_nr": 2 + "score": 0.2635271766530794, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.4487816742581557, - "sentence_nr": 2 + "score": 0.6035397157843727, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.48923776114221357, - "sentence_nr": 2 + "score": 0.4064316590586192, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.7423815677937077, - "sentence_nr": 2 + "score": 0.6473114303275844, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.23010506054463412, - "sentence_nr": 2 + "score": 0.2855640284366791, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.5562150832865256, - "sentence_nr": 2 + "score": 0.5870028589262669, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.3535072546729628, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.6067505948373164, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.37456291810610803, - "sentence_nr": 2 + "score": 0.5169107752364505, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.6571573359746666, - "sentence_nr": 2 + "score": 0.6794853149665443, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.1351928705126306, - "sentence_nr": 2 + "score": 0.5353289105105035, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.28664413593577004, - "sentence_nr": 2 + "score": 0.7178935320712556, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.19375900698784013, - "sentence_nr": 2 + "score": 0.5510544576491839, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.5286306121502188, - "sentence_nr": 2 + "score": 0.6898993575925141, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.1931328662607509, - "sentence_nr": 2 + "score": 0.30587159272611075, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.43550456875371113, - "sentence_nr": 2 + "score": 0.4856692118018501, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.3394516832204828, - "sentence_nr": 2 + "score": 0.3739787291921192, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.5487992573856032, - "sentence_nr": 2 + "score": 0.5302315354733708, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 + "score": 0.03630887444624427, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.4082367628634589, - "sentence_nr": 2 + "score": 0.18449043718721744, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.3623657040262751, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.4992077491214507, - "sentence_nr": 2 + "score": 0.4324660136998897, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.2058069729608087, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.4019360953517204, - "sentence_nr": 2 + "score": 0.444143437866834, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.11350940547542104, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.23977961412737073, - "sentence_nr": 2 + "score": 0.28337457669614124, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.39382730058430515, - "sentence_nr": 2 + "score": 0.11457550862274267, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.6347386700093041, - "sentence_nr": 2 + "score": 0.4234525365021209, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.19418711990577714, - "sentence_nr": 2 + "score": 0.11506534027158988, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.41416182549648484, - "sentence_nr": 2 + "score": 0.45183832289757403, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.07653930552226329, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3454416722587219, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.36432536049590997, - "sentence_nr": 2 + "score": 0.15141938460939539, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.6094788325888109, - "sentence_nr": 2 + "score": 0.547291242951542, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.016148804908990694, - "sentence_nr": 2 + "score": 0.06453967836796516, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.059937646349923615, - "sentence_nr": 2 + "score": 0.4154818199721149, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.26871972706804337, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.5406826424997226, - "sentence_nr": 2 + "score": 0.2933101487116941, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.29222881654408056, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.6120984237392771, - "sentence_nr": 2 + "score": 0.26075278487602266, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.30956660793759877, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.6188773222172356, - "sentence_nr": 2 + "score": 0.303183815224326, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.14588825992287732, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.39984326863280045, - "sentence_nr": 2 + "score": 0.20679375298337374, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.17890209808948412, - "sentence_nr": 2 + "score": 0.3076750746495824, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.4299320286626716, - "sentence_nr": 2 + "score": 0.46542265321304765, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.15196406724218744, - "sentence_nr": 2 + "score": 0.2317209329773014, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.43726288798668184, - "sentence_nr": 2 + "score": 0.43185448543679017, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.09010469109887134, - "sentence_nr": 2 + "score": 0.2909986146455974, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.27026073072610995, - "sentence_nr": 2 + "score": 0.47042977301741506, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.5288343547782808, - "sentence_nr": 2 + "score": 0.25506473041366917, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.7969290554899139, - "sentence_nr": 2 + "score": 0.5698114406236083, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.19772527187680455, - "sentence_nr": 2 + "score": 0.20175458741739072, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.5389440496417608, - "sentence_nr": 2 + "score": 0.5445587371263559, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.2575855673150857, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5493846271661655, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.4330803865682828, - "sentence_nr": 2 + "score": 0.07272528883632293, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.7589029338970602, - "sentence_nr": 2 + "score": 0.404446825928217, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.10624793541906809, - "sentence_nr": 2 + "score": 0.1337989438650252, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.32563569843178114, - "sentence_nr": 2 + "score": 0.5813867224396951, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.23416092054895646, - "sentence_nr": 2 + "score": 0.08617288043560194, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.5474842887821844, - "sentence_nr": 2 + "score": 0.3776558917820924, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.07938718996384923, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.38935973617512226, - "sentence_nr": 2 + "score": 0.18817142418518176, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.09100730294865149, - "sentence_nr": 2 + "score": 0.07201542184499009, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.4152991006861775, - "sentence_nr": 2 + "score": 0.24935341247163162, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.12576299804399627, - "sentence_nr": 2 + "score": 0.09121255946496629, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.4275740936545043, - "sentence_nr": 2 + "score": 0.23409194618638485, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.1258768401036426, - "sentence_nr": 2 + "score": 0.20019796122672592, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.4861652311370069, - "sentence_nr": 2 + "score": 0.4810502579226327, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.10541304780958503, - "sentence_nr": 2 + "score": 0.2517564097762976, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.3583012077811813, - "sentence_nr": 2 + "score": 0.5081933712560073, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.20149292377213346, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.2167522404231326, - "sentence_nr": 2 + "score": 0.46775181788199904, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.2307406223440206, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.5788151459845872, - "sentence_nr": 2 + "score": 0.2955812558020175, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.3476349310417924, - "sentence_nr": 2 + "score": 0.2401592192636423, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.09019417034682699, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.28822910320599077, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.41141702892520243, - "sentence_nr": 2 + "score": 0.6087031937056202, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3880515884750121, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.011825748052502034, - "sentence_nr": 2 + "score": 0.6587916715823183, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5142726846179982, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.3840028089763543, - "sentence_nr": 2 + "score": 0.7344716263345912, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.16421603133867055, - "sentence_nr": 2 + "score": 0.4094748015187699, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.29137424728903016, - "sentence_nr": 2 + "score": 0.4288513205758089, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.20812209921683228, - "sentence_nr": 2 + "score": 0.4487746167679644, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.31687414190905666, - "sentence_nr": 2 + "score": 0.4476730201191672, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.16498223460029865, - "sentence_nr": 2 + "score": 0.2836623400057614, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.2972734576062982, - "sentence_nr": 2 + "score": 0.29147337237183046, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.22705489693606415, - "sentence_nr": 2 + "score": 0.16950698451288215, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.39286999195229216, - "sentence_nr": 2 + "score": 0.48668984177868246, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.18893041617782175, - "sentence_nr": 2 + "score": 0.2113054108348111, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.32203554666172596, - "sentence_nr": 2 + "score": 0.44238229987470284, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.16421603133867055, - "sentence_nr": 2 + "score": 0.26207903587847736, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.2938925751927021, - "sentence_nr": 2 + "score": 0.50073123223194, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.1876442538016413, - "sentence_nr": 2 + "score": 0.4527112325797497, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.3540256720775971, - "sentence_nr": 2 + "score": 0.6708989870027865, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.19591247075997567, - "sentence_nr": 2 + "score": 0.4556160153884204, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.32778005458140924, - "sentence_nr": 2 + "score": 0.6661994452325181, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.18637067743823652, - "sentence_nr": 2 + "score": 0.3216756020053242, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.2985651743860094, - "sentence_nr": 2 + "score": 0.6141241026166391, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.2529258575884984, - "sentence_nr": 2 + "score": 0.3977038258772401, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.37189904894232945, - "sentence_nr": 2 + "score": 0.6202897864314184, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.16421603133867055, - "sentence_nr": 2 + "score": 0.37570809340937233, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.29137424728903016, - "sentence_nr": 2 + "score": 0.6339141734561076, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.16421603133867055, - "sentence_nr": 2 + "score": 0.559332422592187, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.29137424728903016, - "sentence_nr": 2 + "score": 0.733291190094771, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.274614810062371, - "sentence_nr": 2 + "score": 0.3167585643537871, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.5281783547748619, - "sentence_nr": 2 + "score": 0.5076869840147092, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.18452698284010527, - "sentence_nr": 2 + "score": 0.3446592076818278, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.37129342404244153, - "sentence_nr": 2 + "score": 0.5819912583909785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.1649362336939456, - "sentence_nr": 2 + "score": 0.23270938096152352, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.36964185672093963, - "sentence_nr": 2 + "score": 0.4490269267329941, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.13727969222453051, - "sentence_nr": 2 + "score": 0.44114781827798216, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.29409236920754495, - "sentence_nr": 2 + "score": 0.6241365710582877, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.09261843076782389, - "sentence_nr": 2 + "score": 0.529527758323629, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.3406703374268109, - "sentence_nr": 2 + "score": 0.6540432510655854, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.02874370235825497, - "sentence_nr": 2 + "score": 0.49704232910799745, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.09875426372916535, - "sentence_nr": 2 + "score": 0.6453248294274054, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.44770953300438343, - "sentence_nr": 2 + "score": 0.3542266508664836, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.6419549710447301, - "sentence_nr": 2 + "score": 0.5643413028542406, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.18523686153564775, - "sentence_nr": 2 + "score": 0.3479698393875884, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3537843522005248, - "sentence_nr": 2 + "score": 0.5760833125751785, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.24373253714463095, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.49482039214573803, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.113156606711698, - "sentence_nr": 2 + "score": 0.3995439803178399, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.43802319023845615, - "sentence_nr": 2 + "score": 0.6021193793256325, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.13466364265367983, - "sentence_nr": 2 + "score": 0.2988697040013311, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.29553023036585113, - "sentence_nr": 2 + "score": 0.5442522660489195, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.08468606881585687, - "sentence_nr": 2 + "score": 0.500703635659656, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.2488373778948128, - "sentence_nr": 2 + "score": 0.6501904887399698, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.34396207830145586, - "sentence_nr": 2 + "score": 0.2993081268625724, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.5775887851128505, - "sentence_nr": 2 + "score": 0.47777429598730525, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.30371045098471633, - "sentence_nr": 2 + "score": 0.43330223254789785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.6140790369362206, - "sentence_nr": 2 + "score": 0.5564499529933307, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.2252297536658673, - "sentence_nr": 2 + "score": 0.1466607445607986, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.437729946490623, - "sentence_nr": 2 + "score": 0.36552963821230766, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.17546787062208544, - "sentence_nr": 2 + "score": 0.35312894221988256, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.40827921653488547, - "sentence_nr": 2 + "score": 0.5483853808672988, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.26019126665731623, - "sentence_nr": 2 + "score": 0.41807822202441103, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.535666221551461, - "sentence_nr": 2 + "score": 0.577545891208518, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.10690275145666722, - "sentence_nr": 2 + "score": 0.3475258894340562, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.16763819765042876, - "sentence_nr": 2 + "score": 0.5729813197277963, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.33047557311918846, - "sentence_nr": 2 + "score": 0.3473313422920779, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.6134378350119151, - "sentence_nr": 2 + "score": 0.6495220842154038, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.2469704880271774, - "sentence_nr": 2 + "score": 0.20238796310390209, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5214737751636569, - "sentence_nr": 2 + "score": 0.5665467522687606, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.1839576031944879, - "sentence_nr": 2 + "score": 0.4612469192468151, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.4041591260008859, - "sentence_nr": 2 + "score": 0.689540484203802, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.2744333720270393, - "sentence_nr": 2 + "score": 0.3319181496626261, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.545433501864422, - "sentence_nr": 2 + "score": 0.6296213700542458, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.1839576031944879, - "sentence_nr": 2 + "score": 0.45718638941364104, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.2608764081982116, - "sentence_nr": 2 + "score": 0.6765316874457515, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.23954744997508795, - "sentence_nr": 2 + "score": 0.2842437601270078, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5282078787711029, - "sentence_nr": 2 + "score": 0.616554183160495, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.3004961314114194, - "sentence_nr": 2 + "score": 0.5145500336945869, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.6112720381807045, - "sentence_nr": 2 + "score": 0.6710712793486331, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.2668575997365348, - "sentence_nr": 2 + "score": 0.6260866791475674, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5174669930427155, - "sentence_nr": 2 + "score": 0.7122695616091047, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.25289636204048427, - "sentence_nr": 2 + "score": 0.3856140451435003, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.55030569340461, - "sentence_nr": 2 + "score": 0.5942230347389459, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.2997527757973927, - "sentence_nr": 2 + "score": 0.29427156769985635, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.5791864392203819, - "sentence_nr": 2 + "score": 0.38674923884011136, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.2583948662751404, - "sentence_nr": 2 + "score": 0.025127088788317715, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.5105420923907518, - "sentence_nr": 2 + "score": 0.21031980892802613, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.1603931733528484, - "sentence_nr": 2 + "score": 0.14721260533033206, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.37904065806088205, - "sentence_nr": 2 + "score": 0.2721389124032325, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.4177104476436896, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.7086018074577748, - "sentence_nr": 2 + "score": 0.4224062856802975, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.22647299841062532, - "sentence_nr": 2 + "score": 0.23333094197299464, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.5653420376166065, - "sentence_nr": 2 + "score": 0.49932033002402926, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.08677705107533369, - "sentence_nr": 2 + "score": 0.11159818222678333, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.19558575283017382, - "sentence_nr": 2 + "score": 0.4729286927154353, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.34749088141991274, - "sentence_nr": 2 + "score": 0.3095017641836787, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.6197290698554747, - "sentence_nr": 2 + "score": 0.5048955319030237, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.1034094067304739, - "sentence_nr": 2 + "score": 0.4060492501139825, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.24170749955870371, - "sentence_nr": 2 + "score": 0.5683580956250115, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.20580697296080874, - "sentence_nr": 2 + "score": 0.24406315755131383, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.5235721554417833, - "sentence_nr": 2 + "score": 0.4895502947549771, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.2247327109713433, - "sentence_nr": 2 + "score": 0.3534880761230573, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.5250140675378029, - "sentence_nr": 2 + "score": 0.631198012145244, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.3378883984281531, - "sentence_nr": 2 + "score": 0.07640690432316208, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.6049772225333672, - "sentence_nr": 2 + "score": 0.4641021856152639, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.2761659300730445, - "sentence_nr": 2 + "score": 0.16305626357596484, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.5565926641426052, - "sentence_nr": 2 + "score": 0.4931017709619054, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.34830115722228644, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.5888195275254285, - "sentence_nr": 2 + "score": 0.1806974226541495, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.36581287441608196, - "sentence_nr": 2 + "score": 0.4264281995893693, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.5826367630755845, - "sentence_nr": 2 + "score": 0.5686715593689737, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.24624087743244766, - "sentence_nr": 2 + "score": 0.3755133068349505, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.5070519477529656, - "sentence_nr": 2 + "score": 0.5350958966397039, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.38749284922692695, - "sentence_nr": 2 + "score": 0.23876161112129105, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.6516567627893857, - "sentence_nr": 2 + "score": 0.5060111681210002, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.2274466311435254, - "sentence_nr": 2 + "score": 0.48773160239847224, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.48138629187483895, - "sentence_nr": 2 + "score": 0.6806798635651312, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.23709755163544347, - "sentence_nr": 2 + "score": 0.30528565543580843, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.4710922506827035, - "sentence_nr": 2 + "score": 0.5318320483880432, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.3298078055627824, - "sentence_nr": 2 + "score": 0.33552713250034905, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.553504716597481, - "sentence_nr": 2 + "score": 0.5622995574132048, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.006420462868175973, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.06924548146482756, - "sentence_nr": 2 + "score": 0.09056338921321325, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.22571464820591175, - "sentence_nr": 2 + "score": 0.35831291876413535, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5194003326020868, - "sentence_nr": 2 + "score": 0.5383434353225599, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.13384453331197527, - "sentence_nr": 2 + "score": 0.29353984288388507, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.4151425963129396, - "sentence_nr": 2 + "score": 0.695138133719427, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.2276261087372084, - "sentence_nr": 2 + "score": 0.36925904697255574, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.5006338961901005, - "sentence_nr": 2 + "score": 0.7332676622154629, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.20109176688134525, - "sentence_nr": 2 + "score": 0.22701436815032078, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.5208655725098277, - "sentence_nr": 2 + "score": 0.6596565788920288, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.2473562600048627, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.5076737995930731, - "sentence_nr": 2 + "score": 0.1552450511464302, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.19726472415983368, - "sentence_nr": 2 + "score": 0.18842393723950338, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.5265681085035203, - "sentence_nr": 2 + "score": 0.242345930892648, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.05469182036071644, - "sentence_nr": 2 + "score": 0.14033475286594138, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.14134611705275643, - "sentence_nr": 2 + "score": 0.2185459072776493, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.29165949127069796, - "sentence_nr": 2 + "score": 0.21966878190344116, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.6416400462287064, - "sentence_nr": 2 + "score": 0.28531911189599546, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.1822418298126852, - "sentence_nr": 2 + "score": 0.21475571940851748, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.47651743090462295, - "sentence_nr": 2 + "score": 0.23332152661609917, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.19089930432372385, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.25962807098290386, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.28074631028535585, - "sentence_nr": 2 + "score": 0.19068201647342703, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6108353273226604, - "sentence_nr": 2 + "score": 0.47264292072872943, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.10369816700638204, - "sentence_nr": 2 + "score": 0.24780611716850762, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.22867526454708295, - "sentence_nr": 2 + "score": 0.47134154774689047, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.1288644336763944, - "sentence_nr": 2 + "score": 0.22071227044983457, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.4506387870225218, - "sentence_nr": 2 + "score": 0.45625358803874827, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.3003653956261136, - "sentence_nr": 2 + "score": 0.3786316249953693, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.5819235916814075, - "sentence_nr": 2 + "score": 0.6635410443202763, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.34009641866679796, - "sentence_nr": 2 + "score": 0.37051721129462284, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.6387903483458015, - "sentence_nr": 2 + "score": 0.6498104569938186, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.28509173779340485, - "sentence_nr": 2 + "score": 0.2580007063666063, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.5531716447251654, - "sentence_nr": 2 + "score": 0.5822544210906975, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.32701836499065495, - "sentence_nr": 2 + "score": 0.3384715911287291, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.5887530871636928, - "sentence_nr": 2 + "score": 0.5853748388768727, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.3215446470225238, - "sentence_nr": 2 + "score": 0.10361823626504964, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.6044362626458115, - "sentence_nr": 2 + "score": 0.4310581309821413, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.1621452786919851, - "sentence_nr": 2 + "score": 0.2230178035253426, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.4266548530258764, - "sentence_nr": 2 + "score": 0.5101738085806462, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.39305243686884617, - "sentence_nr": 2 + "score": 0.09986058288520257, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.6442143025528515, - "sentence_nr": 2 + "score": 0.36871570224848776, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.21388271661657618, - "sentence_nr": 2 + "score": 0.36794933674561564, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.5554687912903771, - "sentence_nr": 2 + "score": 0.5158310722885024, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.11504003871888992, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.34098902843025203, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.449156109308219, - "sentence_nr": 2 + "score": 0.36601964108639373, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.6968694846040075, - "sentence_nr": 2 + "score": 0.5845337385286451, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.40551649928110445, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5930648846584049, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.30587770809762665, - "sentence_nr": 2 + "score": 0.26393609309752497, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.564673691651119, - "sentence_nr": 2 + "score": 0.537555561645112, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.14651860136741404, - "sentence_nr": 2 + "score": 0.08722778492512923, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.26874220962782625, - "sentence_nr": 2 + "score": 0.3364641574903517, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.3014199920541698, - "sentence_nr": 2 + "score": 0.17929000882953408, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.37258990587027996, - "sentence_nr": 2 + "score": 0.4496535426738611, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.2169400845409205, - "sentence_nr": 2 + "score": 0.16236014331815277, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.352650085718584, - "sentence_nr": 2 + "score": 0.48989515178016146, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.1539430723824455, - "sentence_nr": 2 + "score": 0.07838231858034365, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.26414511109897976, - "sentence_nr": 2 + "score": 0.3561023397717499, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.12816377064174464, - "sentence_nr": 2 + "score": 0.12507294238386796, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.2946511250628232, - "sentence_nr": 2 + "score": 0.4138228238855707, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.14925845270770738, - "sentence_nr": 2 + "score": 0.09662080155533485, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.29215719061835377, - "sentence_nr": 2 + "score": 0.4004063786929589, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.2819443057280203, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.41453825421149665, - "sentence_nr": 2 + "score": 0.30952716928360685, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.22153003594990717, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.3828811613558722, - "sentence_nr": 2 + "score": 0.31930298094188314, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.18843423878971213, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.406619208342576, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.23313203626084417, - "sentence_nr": 2 + "score": 0.37246426676247424, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.3985003392586837, - "sentence_nr": 2 + "score": 0.5209584150532849, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.14651860136741404, - "sentence_nr": 2 + "score": 0.28599643357892673, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.26874220962782625, - "sentence_nr": 2 + "score": 0.5040359554493156, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.14651860136741404, - "sentence_nr": 2 + "score": 0.31290644634463044, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.26874220962782625, - "sentence_nr": 2 + "score": 0.48183974715675615, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.280867833557141, - "sentence_nr": 2 + "score": 0.15984232622995367, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.47682234542802715, - "sentence_nr": 2 + "score": 0.47596521065582126, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.3195214890612964, - "sentence_nr": 2 + "score": 0.19598591795373435, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.6238377764870237, - "sentence_nr": 2 + "score": 0.4825413460017427, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.2026004770366011, - "sentence_nr": 2 + "score": 0.09490574585599613, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.4124307729296919, - "sentence_nr": 2 + "score": 0.38652552310013205, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.13123243740715776, - "sentence_nr": 2 + "score": 0.1536201950485081, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.42907981810256635, - "sentence_nr": 2 + "score": 0.39433169171016624, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.36127411811309323, - "sentence_nr": 2 + "score": 0.3033134628026753, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.6437771669410485, - "sentence_nr": 2 + "score": 0.4755698162289715, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.022435099089467586, - "sentence_nr": 2 + "score": 0.012770470304307417, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.0629963528501866, - "sentence_nr": 2 + "score": 0.107571889368401, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.5992890926074543, - "sentence_nr": 2 + "score": 0.46712834333557585, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.8208010526832126, - "sentence_nr": 2 + "score": 0.7756124961868583, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.27856409500832724, - "sentence_nr": 2 + "score": 0.5425073523683781, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.5620060117576954, - "sentence_nr": 2 + "score": 0.7380948085432402, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.2026004770366011, - "sentence_nr": 2 + "score": 0.5566924420805306, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.3493390600432761, - "sentence_nr": 2 + "score": 0.7485245832644088, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.4216050739426583, - "sentence_nr": 2 + "score": 0.10455448325640569, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.6298635247340831, - "sentence_nr": 2 + "score": 0.4452517051823117, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.15987934837604795, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.06581314846603122, - "sentence_nr": 2 + "score": 0.5371445877689911, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.175300149784418, - "sentence_nr": 2 + "score": 0.20700927851455897, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.32397368477662136, - "sentence_nr": 2 + "score": 0.5234104249604495, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.3479857106948536, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.5434098077482219, - "sentence_nr": 2 + "score": 0.04898848695928882, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.127094130129695, - "sentence_nr": 2 + "score": 0.05938702344685413, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.37183060884198066, - "sentence_nr": 2 + "score": 0.2356799309939399, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.1616475408517619, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.2842078929375233, - "sentence_nr": 2 + "score": 0.11733391315716854, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.1616475408517619, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.2876758205224393, - "sentence_nr": 2 + "score": 0.3665401854549857, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.16055739172356015, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.38788396342269943, - "sentence_nr": 2 + "score": 0.3323242589046854, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.08278476463888747, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.2722654377129244, - "sentence_nr": 2 + "score": 0.3011326009425157, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.438372977246803, - "sentence_nr": 2 + "score": 0.07363400452542229, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.6331196317735631, - "sentence_nr": 2 + "score": 0.3008582090976646, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.29592551586707205, - "sentence_nr": 2 + "score": 0.08713070321109329, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4766320516060901, - "sentence_nr": 2 + "score": 0.26495668798540345, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.1964351708221123, - "sentence_nr": 2 + "score": 0.10709296666230438, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.3340673420939409, - "sentence_nr": 2 + "score": 0.27051785140882895, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.06861077177982006, - "sentence_nr": 2 + "score": 0.3564265023508987, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.2923752795770079, - "sentence_nr": 2 + "score": 0.5720558552741405, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.13914825184539845, - "sentence_nr": 2 + "score": 0.3168766888730919, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.28558864930678796, - "sentence_nr": 2 + "score": 0.5556993363671432, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.14161200935137283, - "sentence_nr": 2 + "score": 0.1914056871173395, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.28747434511519493, - "sentence_nr": 2 + "score": 0.49658804680546015, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.22839293770911745, - "sentence_nr": 2 + "score": 0.20777820949105405, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.4657355446143013, - "sentence_nr": 2 + "score": 0.49320753055615735, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.1709913567536511, - "sentence_nr": 2 + "score": 0.17261709633699074, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.29449196775233905, - "sentence_nr": 2 + "score": 0.4066847602885835, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.13435637642994447, - "sentence_nr": 2 + "score": 0.17293669227382233, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.34277719024611025, - "sentence_nr": 2 + "score": 0.43566853469949024, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.2164949874511416, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.37103544427871854, - "sentence_nr": 2 + "score": 0.46356345415255434, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.2177197358110709, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.3769919943574011, - "sentence_nr": 2 + "score": 0.45763199706851787, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.1873975519857385, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.29483678830580506, - "sentence_nr": 2 + "score": 0.4006866238149517, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.2983904559856352, - "sentence_nr": 2 + "score": 0.3874784036355021, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.5687663930843418, - "sentence_nr": 2 + "score": 0.6440087418049909, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.19109543352736386, - "sentence_nr": 2 + "score": 0.3524104819165935, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.4581747073637954, - "sentence_nr": 2 + "score": 0.6432066906973225, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.010133180370259504, - "sentence_nr": 2 + "score": 0.33071760819179763, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.10549258015880895, - "sentence_nr": 2 + "score": 0.5886340872220965, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0820092565332346, - "sentence_nr": 2 + "score": 0.2115421630907294, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.37421819446145144, - "sentence_nr": 2 + "score": 0.4592142318550694, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.1791552038222414, - "sentence_nr": 2 + "score": 0.1548358410564168, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.2774929236779122, - "sentence_nr": 2 + "score": 0.47381156956039844, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.09521360830382836, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.28777216726322846, - "sentence_nr": 2 + "score": 0.41566998270167405, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.19606965736186524, - "sentence_nr": 2 + "score": 0.20100993861167848, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.5029030066686957, - "sentence_nr": 2 + "score": 0.42816528421673367, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.19598322445625943, - "sentence_nr": 2 + "score": 0.36530627395229537, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.46665751191230503, - "sentence_nr": 2 + "score": 0.5407875609358465, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.21170876705481304, - "sentence_nr": 2 + "score": 0.3452240924044895, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.5213888058464138, - "sentence_nr": 2 + "score": 0.5535093309459415, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.24268972717185816, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5576510639586775, - "sentence_nr": 2 + "score": 0.15816202023379808, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.25500119387217685, - "sentence_nr": 2 + "score": 0.06694383876384004, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5473994521063271, - "sentence_nr": 2 + "score": 0.33483358842355604, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.3536429640513799, - "sentence_nr": 2 + "score": 0.15027544246364394, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.3148511129075859, - "sentence_nr": 2 + "score": 0.11978799774523341, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.6156857190680921, - "sentence_nr": 2 + "score": 0.35375346332533086, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.11884101417355644, - "sentence_nr": 2 + "score": 0.13288189621656632, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.47174335977774734, - "sentence_nr": 2 + "score": 0.39376971391564713, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.08351204957008757, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.3438749143478485, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.2828696562913071, - "sentence_nr": 2 + "score": 0.42093878874048907, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.5815271722252192, - "sentence_nr": 2 + "score": 0.6473313168878321, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.300766546028367, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.01051238006063824, - "sentence_nr": 2 + "score": 0.5375347099200066, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.17812705640334517, - "sentence_nr": 2 + "score": 0.2429883753005781, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.5084809568961394, - "sentence_nr": 2 + "score": 0.5500942714401033, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.2104347389999275, - "sentence_nr": 2 + "score": 0.2320571360424813, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.41820208790180724, - "sentence_nr": 2 + "score": 0.4518734332878898, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.33893526679717595, - "sentence_nr": 2 + "score": 0.08435153871730829, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.5305909471293387, - "sentence_nr": 2 + "score": 0.3396241652752055, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.4005296397635166, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.5780131186067837, - "sentence_nr": 2 + "score": 0.3130397888371956, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.2979260336350717, - "sentence_nr": 2 + "score": 0.19162170741554893, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.5411460905234677, - "sentence_nr": 2 + "score": 0.529718220134276, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.19228093786407296, - "sentence_nr": 2 + "score": 0.19063852279381985, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.4026187336796658, - "sentence_nr": 2 + "score": 0.527809866102274, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.2473562600048627, - "sentence_nr": 2 + "score": 0.06761844202436226, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.3874241899308999, - "sentence_nr": 2 + "score": 0.4092757662956698, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.5043660369058458, - "sentence_nr": 2 + "score": 0.37155720204823606, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.7114685753038728, - "sentence_nr": 2 + "score": 0.6006421195875694, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.2501973138123507, - "sentence_nr": 2 + "score": 0.42409393275664153, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.4364929917146681, - "sentence_nr": 2 + "score": 0.624222744304739, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.4429497126432637, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", + "metric": "chrf", + "score": 0.6440718307281338, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.3520774812078196, - "sentence_nr": 2 + "score": 0.5136648298299584, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.607239668980014, - "sentence_nr": 2 + "score": 0.6450420560705616, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.16467029855845897, - "sentence_nr": 2 + "score": 0.46099008584959905, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.3377501966816411, - "sentence_nr": 2 + "score": 0.6373572880701773, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.23419233116745658, - "sentence_nr": 2 + "score": 0.24825115391503105, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.46742045504786317, - "sentence_nr": 2 + "score": 0.4714738437514274, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.2534684260065973, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.465022490109088, - "sentence_nr": 2 + "score": 0.3582964458954854, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.28341626687166926, - "sentence_nr": 2 + "score": 0.10909451748857432, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.4981912496496188, - "sentence_nr": 2 + "score": 0.37515093102901353, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.18787234368655517, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.43638553308108674, - "sentence_nr": 2 + "score": 0.3268810409004551, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.07967137083817866, - "sentence_nr": 2 + "score": 0.09295079562321831, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.2817820058246867, - "sentence_nr": 2 + "score": 0.34229046446273115, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.09044734140134039, - "sentence_nr": 2 + "score": 0.28655904581184494, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.318967366104251, - "sentence_nr": 2 + "score": 0.4725508462620691, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.005135910808249442, - "sentence_nr": 2 + "score": 0.0747524183542725, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.03203003524868126, - "sentence_nr": 2 + "score": 0.24565798005663728, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.31861382388380677, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.521868055391611, - "sentence_nr": 2 + "score": 0.41678582570423633, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.2002683261487131, - "sentence_nr": 2 + "score": 0.08897307561927967, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.3934832974774716, - "sentence_nr": 2 + "score": 0.3561293213533616, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.29668241011022123, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.056164803850745015, - "sentence_nr": 2 + "score": 0.07592692216583019, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.3268244770409936, - "sentence_nr": 2 + "score": 0.330799461944069, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.08036783502153183, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.28648312939078924, - "sentence_nr": 2 + "score": 0.4246277236041624, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.08098869931579498, - "sentence_nr": 2 + "score": 0.06078826337706973, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.2826687235563463, - "sentence_nr": 2 + "score": 0.3140221984691054, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.19035778476657214, - "sentence_nr": 2 + "score": 0.21838690739485656, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.3275220698724237, - "sentence_nr": 2 + "score": 0.44195361269760747, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.2139885278593109, - "sentence_nr": 2 + "score": 0.1806817845840967, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.5273959990464491, - "sentence_nr": 2 + "score": 0.4245252650125705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.16558141211628247, - "sentence_nr": 2 + "score": 0.08946803054115307, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.37532912975144084, - "sentence_nr": 2 + "score": 0.3915771574810612, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.06356475632281808, - "sentence_nr": 2 + "score": 0.4801015157149487, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.3009759906152119, - "sentence_nr": 2 + "score": 0.6272559830443027, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.14227980900528805, - "sentence_nr": 2 + "score": 0.5121292781944586, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.3453010483553648, - "sentence_nr": 2 + "score": 0.6535353826694096, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.15525616702869105, - "sentence_nr": 2 + "score": 0.2762600514536916, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.31389802556639906, - "sentence_nr": 2 + "score": 0.5268462708447845, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.2788221557440545, - "sentence_nr": 2 + "score": 0.14338573762564966, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.6008304689237989, - "sentence_nr": 2 + "score": 0.3531021816263784, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.16846216892989907, - "sentence_nr": 2 + "score": 0.2086384272947034, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.3801074056305939, - "sentence_nr": 2 + "score": 0.41111566649227, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.15332513333273987, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.2932376272598407, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.1893886712648118, - "sentence_nr": 2 + "score": 0.04594302966654298, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.38509762225132554, - "sentence_nr": 2 + "score": 0.24680538829271628, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.005709385735849358, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.03522717964962578, - "sentence_nr": 2 + "score": 0.2706849406099641, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.18236198178601878, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.33077619366118716, - "sentence_nr": 2 + "score": 0.2711396488064229, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.11012419619306524, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.4937984099457621, - "sentence_nr": 2 + "score": 0.45696598587083365, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.14599223028360678, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.4828499846637324, - "sentence_nr": 2 + "score": 0.287111534154844, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.12579787892324615, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.46133126472684716, - "sentence_nr": 2 + "score": 0.2951873860944009, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.13043787072509858, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.463417247785631, - "sentence_nr": 2 + "score": 0.045411074399305515, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.1235463341630649, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.4614840691320092, - "sentence_nr": 2 + "score": 0.19534610772698124, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.055560319570139106, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.3060391771300899, - "sentence_nr": 2 + "score": 0.07220969642779033, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.2744333720270393, - "sentence_nr": 2 + "score": 0.12878480872416886, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.6189610313789276, - "sentence_nr": 2 + "score": 0.4656205040544615, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.10547213336960157, - "sentence_nr": 2 + "score": 0.28519788815271513, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.4536459955429135, - "sentence_nr": 2 + "score": 0.5802455455360611, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.35768733769481764, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.13727159408550668, - "sentence_nr": 2 + "score": 0.28338350923662375, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5081399318838673, - "sentence_nr": 2 + "score": 0.49566471724911376, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.06449817351457392, - "sentence_nr": 2 + "score": 0.29213605340001886, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.3424747506665667, - "sentence_nr": 2 + "score": 0.5193184356046634, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.18837227000249876, - "sentence_nr": 2 + "score": 0.138833231672613, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.4972510079060122, - "sentence_nr": 2 + "score": 0.47061483468926485, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.1281637706417447, - "sentence_nr": 2 + "score": 0.36725466212785407, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.22833109825855033, - "sentence_nr": 2 + "score": 0.5378300699928413, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.25500119387217685, - "sentence_nr": 2 + "score": 0.4826464770670779, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.42217126960650364, - "sentence_nr": 2 + "score": 0.6103311193264034, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.29264105234089743, - "sentence_nr": 2 + "score": 0.3226429253246242, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.36844739850003594, - "sentence_nr": 2 + "score": 0.4558280044517862, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.15829860807960125, - "sentence_nr": 2 + "score": 0.4124513407273539, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.28885600341606654, - "sentence_nr": 2 + "score": 0.6927292478929317, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.1309822409175701, - "sentence_nr": 2 + "score": 0.4270577041161737, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.24657461387159663, - "sentence_nr": 2 + "score": 0.6772877372145952, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.03320059139020178, - "sentence_nr": 2 + "score": 0.46750299425177155, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.07438646667723649, - "sentence_nr": 2 + "score": 0.6912570546054524, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.47239214217316433, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.1125619471249954, - "sentence_nr": 2 + "score": 0.6826781717152027, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.20521339316349824, - "sentence_nr": 2 + "score": 0.4380189650991404, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.31921572308551066, - "sentence_nr": 2 + "score": 0.6285301422522439, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.2858251502733532, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5313749753656399, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.05173267766588821, - "sentence_nr": 2 + "score": 0.4854148148590684, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.2600178198215753, - "sentence_nr": 2 + "score": 0.5985163152295152, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.125405948089153, - "sentence_nr": 2 + "score": 0.5647041916493775, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.2272526814684417, - "sentence_nr": 2 + "score": 0.6502234807392684, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.1397775515541073, - "sentence_nr": 2 + "score": 0.35839174689995523, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.2591367637695346, - "sentence_nr": 2 + "score": 0.5156712685166046, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 + "score": 0.09949076703115871, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.1658343254577952, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.7882997401328445, - "sentence_nr": 3 + "score": 0.5368289150607153, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.2137951235585419, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.31565930043784707, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.4855332614117322, - "sentence_nr": 3 + "score": 0.2564152992831025, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.5299556742893647, - "sentence_nr": 3 + "score": 0.5734154889930266, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.06367523345131179, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.5103516764863386, - "sentence_nr": 3 + "score": 0.40417603881741776, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.3088457262825233, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.554610859906414, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.11823377429398636, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.4865071573440099, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.362911439724043, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.6960917409740967, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.8209757784637755, - "sentence_nr": 3 + "score": 0.05179532920145171, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.14790264259417688, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.27159767590045303, - "sentence_nr": 3 + "score": 0.252263377361689, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.6849386986272349, - "sentence_nr": 3 + "score": 0.23397944996002637, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 + "score": 0.909878624371155, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 + "score": 0.9494599978334789, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.25552199116069907, - "sentence_nr": 3 + "score": 0.6484538568755306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.3799133205289109, - "sentence_nr": 3 + "score": 0.8387015535622947, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.9625248317849852, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 + "score": 0.9799603794887166, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 + "score": 0.18690562063516822, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.22090491782919655, - "sentence_nr": 3 + "score": 0.4977106916309785, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 3 + "score": 0.1339727584572257, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.280413108453108, - "sentence_nr": 3 + "score": 0.5319216512574707, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.139800134566647, - "sentence_nr": 3 + "score": 0.07251137791160432, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.2510112235832054, - "sentence_nr": 3 + "score": 0.4703241600605098, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.40214612768560637, - "sentence_nr": 3 + "score": 0.08404094012985504, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.45128424593135114, - "sentence_nr": 3 + "score": 0.4372015242865747, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0925329498915617, - "sentence_nr": 3 + "score": 0.14636085397535975, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.2110486160692096, - "sentence_nr": 3 + "score": 0.4526469182638613, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3296972886859299, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.41654484827391225, - "sentence_nr": 3 + "score": 0.06321303909933033, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.12453389344594705, - "sentence_nr": 3 + "score": 0.14252585230727824, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.141543757252386, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.2594145364221844, - "sentence_nr": 3 + "score": 0.09976480990835405, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 3 + "score": 0.12457006790652007, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.6181373706707737, - "sentence_nr": 3 + "score": 0.3654990157692545, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.2128456505982782, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.512762518189388, - "sentence_nr": 3 + "score": 0.43481131669667605, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.1478211228617202, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.40026283527466844, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.581972638479957, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.6970914528585833, - "sentence_nr": 3 + "score": 0.1978878664885206, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.44120063733294235, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.5296624608564717, - "sentence_nr": 3 + "score": 0.2571422518248478, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.4440750605884706, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.5402588602256685, - "sentence_nr": 3 + "score": 0.14611434422750874, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.6837528314895732, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.7968789890147058, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.6458552885189878, - "sentence_nr": 3 + "score": 0.7017829861193574, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.7468283944111381, - "sentence_nr": 3 + "score": 0.7743327021667388, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.4272870063962341, - "sentence_nr": 3 + "score": 0.6961795371760597, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.5170917334956868, - "sentence_nr": 3 + "score": 0.7859480663394858, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.43310177167002284, - "sentence_nr": 3 + "score": 0.2360941227140328, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.534533410927948, - "sentence_nr": 3 + "score": 0.35939098278145853, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.14118350058219528, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.26481979271706185, - "sentence_nr": 3 + "score": 0.20431837779877604, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.3212854967972961, - "sentence_nr": 3 + "score": 0.1811004938014804, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.47171327621770304, - "sentence_nr": 3 + "score": 0.2649993136544717, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.23649053182388327, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.35447530946908884, - "sentence_nr": 3 + "score": 0.4127382174759535, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.4577275269488853, - "sentence_nr": 3 + "score": 0.20721924345714232, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.6747054474171109, - "sentence_nr": 3 + "score": 0.36475932190367044, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 + "score": 0.11386607947762988, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 + "score": 0.33564583347921473, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.3508739523842563, - "sentence_nr": 3 + "score": 0.570135897056151, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.5533976153694653, - "sentence_nr": 3 + "score": 0.6801332690579707, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.23705266435224473, - "sentence_nr": 3 + "score": 0.46442643702863534, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.44716007458096513, - "sentence_nr": 3 + "score": 0.5519480629125156, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.25530635525095574, - "sentence_nr": 3 + "score": 0.6268941789647348, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.4224404198283467, - "sentence_nr": 3 + "score": 0.6958291103494518, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.37762270401758113, - "sentence_nr": 3 + "score": 0.21305368975019265, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5372679696382219, - "sentence_nr": 3 + "score": 0.4371748197696026, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.44392090655418587, - "sentence_nr": 3 + "score": 0.22837680015088951, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5678926447384061, - "sentence_nr": 3 + "score": 0.44164180234500505, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.4151474543103342, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.638952468710771, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.40891568776497583, - "sentence_nr": 3 + "score": 0.20876900081884944, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.46522329223142805, - "sentence_nr": 3 + "score": 0.3981381071356935, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.11436433361427001, - "sentence_nr": 3 + "score": 0.11634129390828839, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.23221971735799607, - "sentence_nr": 3 + "score": 0.31530902302000635, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.18580985894574314, - "sentence_nr": 3 + "score": 0.19544795798162903, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.3347249292100999, - "sentence_nr": 3 + "score": 0.3835451743665027, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.15138514598766048, - "sentence_nr": 3 + "score": 0.37917766663411384, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.3237497764315872, - "sentence_nr": 3 + "score": 0.5365794450039074, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.27668736912821895, - "sentence_nr": 3 + "score": 0.359355103997122, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.4414406760568898, - "sentence_nr": 3 + "score": 0.5589602235417395, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 + "score": 0.4267520229161, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 + "score": 0.5518115366540288, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.2465659486053858, - "sentence_nr": 3 + "score": 0.3221305290185444, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.5689069160047179, - "sentence_nr": 3 + "score": 0.4866081657424789, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.35551034193127495, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.30391153783979835, - "sentence_nr": 3 + "score": 0.5627284645723449, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.1544458227548897, - "sentence_nr": 3 + "score": 0.5039752490702457, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3343587266874694, - "sentence_nr": 3 + "score": 0.613669501327356, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.3627848276110141, - "sentence_nr": 3 + "score": 0.6260375038358343, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5349346532576155, - "sentence_nr": 3 + "score": 0.7803415401430737, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.23817261442630488, - "sentence_nr": 3 + "score": 0.5088535943352446, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.448286611717823, - "sentence_nr": 3 + "score": 0.625202596789752, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.562048819850726, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.7192054483864224, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.20679845323803403, - "sentence_nr": 3 + "score": 0.3146726146646545, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.47636494608150104, - "sentence_nr": 3 + "score": 0.4709531555683, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3941975148525721, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5191046479503385, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.09147827112247602, - "sentence_nr": 3 + "score": 0.262633940062176, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.3258762519783793, - "sentence_nr": 3 + "score": 0.41923206553744197, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.15604242268653643, - "sentence_nr": 3 + "score": 0.44898438516407524, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.2255928425212252, - "sentence_nr": 3 + "score": 0.6143783254714975, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.2618161850312308, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.6159319815107203, - "sentence_nr": 3 + "score": 0.46946589430056646, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.4395347891601966, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 + "score": 0.6124294442602769, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.4850978822371748, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.2391308148553106, - "sentence_nr": 3 + "score": 0.6352541213631081, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.18180608220159192, - "sentence_nr": 3 + "score": 0.13139413594401378, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.27307753334479423, - "sentence_nr": 3 + "score": 0.2490406851204271, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.14965975078050625, - "sentence_nr": 3 + "score": 0.3112317271723676, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.22213502776474325, - "sentence_nr": 3 + "score": 0.4361597730424806, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.49612267717096975, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5021029088235913, - "sentence_nr": 3 + "score": 0.6218353723304708, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.3160946016179871, - "sentence_nr": 3 + "score": 0.39997687282627975, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.407876439044591, - "sentence_nr": 3 + "score": 0.5906362815628093, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5208833700498166, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.025108530586642898, - "sentence_nr": 3 + "score": 0.6368157603637512, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.17466240109087192, - "sentence_nr": 3 + "score": 0.3501847839621347, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.2719194508460068, - "sentence_nr": 3 + "score": 0.5180344374850399, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1743988338080954, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.12316365460790003, - "sentence_nr": 3 + "score": 0.4684683280769817, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.3335763231736967, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.35862918415512257, - "sentence_nr": 3 + "score": 0.4822714438205533, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.24728515687112834, - "sentence_nr": 3 + "score": 0.41618377742781326, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.3088155734423375, - "sentence_nr": 3 + "score": 0.5456804815374756, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.30485765641951534, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.78479833664205, - "sentence_nr": 3 + "score": 0.4770499606054267, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 + "score": 0.33469420519942356, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 + "score": 0.40802446160905737, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.2719326877457978, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.4943569700727416, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.3267294026204632, - "sentence_nr": 3 + "score": 0.249036269104499, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.4510525482602028, - "sentence_nr": 3 + "score": 0.42116420214640826, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.46916497710648375, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.27718461611705486, - "sentence_nr": 3 + "score": 0.6210364770630794, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.1513162576311821, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.3665181321185458, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.5365920629514802, - "sentence_nr": 3 + "score": 0.17361047672608262, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.6274039030337838, - "sentence_nr": 3 + "score": 0.39377589303767235, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.12646071698454284, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.35399465579414496, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.4578226095312774, - "sentence_nr": 3 + "score": 0.7096224667917136, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.5406295999835291, - "sentence_nr": 3 + "score": 0.8862932371217843, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5294442646627652, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.002054231717337716, - "sentence_nr": 3 + "score": 0.7281375072835307, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.12286996020967837, - "sentence_nr": 3 + "score": 0.933651069586263, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.31567668741706395, - "sentence_nr": 3 + "score": 0.9586507529693243, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.353203510510529, - "sentence_nr": 3 + "score": 0.5425651337252639, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.4910213297498164, - "sentence_nr": 3 + "score": 0.6573851660329229, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.4815092081725061, - "sentence_nr": 3 + "score": 0.49475425785336474, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.5820265218174012, - "sentence_nr": 3 + "score": 0.5906548177852229, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 + "score": 0.509958011324736, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 + "score": 0.6038542862803142, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.36210097004176117, - "sentence_nr": 3 + "score": 0.18467608126336754, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.408098151133905, - "sentence_nr": 3 + "score": 0.46127619811207604, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.3165014630070639, - "sentence_nr": 3 + "score": 0.29458137881791246, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.42516173623967946, - "sentence_nr": 3 + "score": 0.5492390632020873, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.15820362165931962, - "sentence_nr": 3 + "score": 0.154638418688186, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.2249046365436241, - "sentence_nr": 3 + "score": 0.38427876360308916, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.1379310072046867, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.6874078611406401, - "sentence_nr": 3 + "score": 0.32409635064062775, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.4753167451887016, - "sentence_nr": 3 + "score": 0.3067124925928069, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.6372909532389948, - "sentence_nr": 3 + "score": 0.4473439946126318, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.2822493397640795, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.46507913833761805, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.38317923930200504, - "sentence_nr": 3 + "score": 0.3925406773051543, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.47975624978837655, - "sentence_nr": 3 + "score": 0.5373872822631721, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2753456537058715, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.2054194471318506, - "sentence_nr": 3 + "score": 0.4157637470934354, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.25678404806291744, - "sentence_nr": 3 + "score": 0.3360301095319906, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.37045149029437513, - "sentence_nr": 3 + "score": 0.5015691430360719, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.23660362391696813, - "sentence_nr": 3 + "score": 0.11502119605241674, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.34152697838249696, - "sentence_nr": 3 + "score": 0.2826072681332473, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.1138894686892089, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.23532055968260987, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.08049386277412464, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.21735060795559732, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.26005633892051505, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.32347988292946617, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.1243018504102695, - "sentence_nr": 3 + "score": 0.23882935646817824, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.32950116238735283, - "sentence_nr": 3 + "score": 0.31980378769803924, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.322315347128407, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.5103516764863386, - "sentence_nr": 3 + "score": 0.38629355287842254, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.21675506796946695, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.44636018244046766, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.23183307484609084, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.4860771998642055, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.291308139574652, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.47517712618294367, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.36763082847636347, - "sentence_nr": 3 + "score": 0.3011454888332037, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.45637140510576385, - "sentence_nr": 3 + "score": 0.4262131523304905, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.35933410857228176, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.16935976352352106, - "sentence_nr": 3 + "score": 0.48586232132101626, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.31268514922728713, - "sentence_nr": 3 + "score": 0.2697896652026255, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.41990725085948355, - "sentence_nr": 3 + "score": 0.3998510332125744, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.22269084768108507, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4278034123121377, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.1300236096509615, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.32525153283102953, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.29431947865853453, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.44837466661278663, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.07998273930895511, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.2553179648394861, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.10154990983620787, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3241234761287709, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.07911223895726585, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3284566286505505, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.565361573648609, - "sentence_nr": 3 + "score": 0.3273734953951328, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.47406821010356615, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.8363600587440573, - "sentence_nr": 3 + "score": 0.2875779877950604, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.9912737182609732, - "sentence_nr": 3 + "score": 0.40209911650829044, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.3288944381277255, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.44408070821988965, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.20100292863011363, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4076276304952943, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.6018154975998465, - "sentence_nr": 3 + "score": 0.28997498044330217, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.7669980679050217, - "sentence_nr": 3 + "score": 0.49213353233430807, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.14097078788452733, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.4079828299257035, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.34589895849033114, - "sentence_nr": 3 + "score": 0.1548512533490527, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.44792042673107413, - "sentence_nr": 3 + "score": 0.316352437998455, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.47320724783393625, - "sentence_nr": 3 + "score": 0.23102354810546433, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.5833006006517599, - "sentence_nr": 3 + "score": 0.4512153003070141, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.3556521383601747, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.594830811413066, - "sentence_nr": 3 + "score": 0.24080045866228475, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.5406964703993759, - "sentence_nr": 3 + "score": 0.10763639119133948, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.5964595329953364, - "sentence_nr": 3 + "score": 0.2286324938333245, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.2575863752355164, - "sentence_nr": 3 + "score": 0.1387209968695978, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.3717184743596148, - "sentence_nr": 3 + "score": 0.3380551337195283, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.08197539732074254, - "sentence_nr": 3 + "score": 0.13529314038135454, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.2552663483401067, - "sentence_nr": 3 + "score": 0.22343406175513267, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.43994654743790196, - "sentence_nr": 3 + "score": 0.3366798594244885, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.5758276578902723, - "sentence_nr": 3 + "score": 0.5032290575942354, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.3730786950813075, - "sentence_nr": 3 + "score": 0.3079971615386303, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.47401660085208147, - "sentence_nr": 3 + "score": 0.4820111161799533, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.28695849032593473, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4998009181545032, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.38223593598574, - "sentence_nr": 3 + "score": 0.29614367235735506, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.5729676575997464, - "sentence_nr": 3 + "score": 0.6045301048147214, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3264083012851069, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.19328966457045355, - "sentence_nr": 3 + "score": 0.46032850662611646, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.20477156411200437, - "sentence_nr": 3 + "score": 0.2593432509712634, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.3371728179865314, - "sentence_nr": 3 + "score": 0.4745626718142974, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.19984607356962125, - "sentence_nr": 3 + "score": 0.3199269653277632, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.29326031481052006, - "sentence_nr": 3 + "score": 0.46348573637808615, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.08939270118279458, - "sentence_nr": 3 + "score": 0.0832791679921855, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.2952752522340665, - "sentence_nr": 3 + "score": 0.2503889973495181, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.21629114799587432, - "sentence_nr": 3 + "score": 0.20368338136177952, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.3542320138389837, - "sentence_nr": 3 + "score": 0.3653296593218242, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.21993356630819796, - "sentence_nr": 3 + "score": 0.4424338955424085, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.3822901360655399, - "sentence_nr": 3 + "score": 0.6339970261152106, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.12212865548711085, - "sentence_nr": 3 + "score": 0.4265158106561096, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.27604929504751197, - "sentence_nr": 3 + "score": 0.5640263785205409, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.08939270118279458, - "sentence_nr": 3 + "score": 0.4265158106561096, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.2952752522340665, - "sentence_nr": 3 + "score": 0.5834883274592383, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.12072692160188762, - "sentence_nr": 3 + "score": 0.14115180190142887, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.3162144633910661, - "sentence_nr": 3 + "score": 0.3611264285108106, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.17095864413061523, - "sentence_nr": 3 + "score": 0.17537321780820925, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.2805749649536233, - "sentence_nr": 3 + "score": 0.43171109126508717, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.05304430074030856, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2709892265354633, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.13952118378975725, - "sentence_nr": 3 + "score": 0.18720945362012015, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.2962794525145751, - "sentence_nr": 3 + "score": 0.2715804874226587, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.12356615100322026, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.028735632183908046, - "sentence_nr": 3 + "score": 0.2302608543673073, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.17670087745185423, - "sentence_nr": 3 + "score": 0.0685054000547153, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.3136010782144669, - "sentence_nr": 3 + "score": 0.14139517208631328, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.27274191069381915, - "sentence_nr": 3 + "score": 0.20881279882974757, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.37436438971100644, - "sentence_nr": 3 + "score": 0.4043187259329016, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.11473815526787112, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.36686611148406256, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.10400249861669406, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.24794428614874814, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.06306929577938719, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.1592769733360586, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.17662903260733673, - "sentence_nr": 3 + "score": 0.13977970684956492, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.359573626731952, - "sentence_nr": 3 + "score": 0.4014800837643581, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.526589137558171, - "sentence_nr": 3 + "score": 0.03947461188342995, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.5667866238125795, - "sentence_nr": 3 + "score": 0.14847528747028665, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.20045686131572826, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.40666590434690014, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.20748116469976316, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.7155411017347171, - "sentence_nr": 3 + "score": 0.4063660475668118, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.19221269303152164, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.39590642425729394, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.6960917409740967, - "sentence_nr": 3 + "score": 0.2537557009568589, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.8209757784637755, - "sentence_nr": 3 + "score": 0.39325950430752504, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.28840308930540803, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.019516573752972968, - "sentence_nr": 3 + "score": 0.3915762897108857, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.22419056820298167, - "sentence_nr": 3 + "score": 0.2494491272843294, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.3577306040313533, - "sentence_nr": 3 + "score": 0.3095153437956992, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.2434330428491034, - "sentence_nr": 3 + "score": 0.22808349744044165, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.31858900384957733, - "sentence_nr": 3 + "score": 0.5219559548268804, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.583526016818016, - "sentence_nr": 3 + "score": 0.25740737310782613, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.6994652193905146, - "sentence_nr": 3 + "score": 0.5296577800002638, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.27405612859390877, - "sentence_nr": 3 + "score": 0.17467275747732053, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.4639958592456083, - "sentence_nr": 3 + "score": 0.4425746537169907, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.4390960897971484, - "sentence_nr": 3 + "score": 0.39105282175830797, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.541742178821102, - "sentence_nr": 3 + "score": 0.5918209407860066, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.13232291594986312, - "sentence_nr": 3 + "score": 0.20798473392141248, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.301901669683193, - "sentence_nr": 3 + "score": 0.4882728259348431, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.15084825228964133, - "sentence_nr": 3 + "score": 0.38994162074103633, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3284886849880412, - "sentence_nr": 3 + "score": 0.5563550526031608, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.26949792178164744, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.4573532520066626, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.4452652851854937, - "sentence_nr": 3 + "score": 0.19154073663245894, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.5889782977654896, - "sentence_nr": 3 + "score": 0.36853748147123533, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.1561669836509596, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.35203414979696007, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 3 + "score": 0.3367828743677757, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3932141708916282, - "sentence_nr": 3 + "score": 0.5208619367149742, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.1536690667279411, - "sentence_nr": 3 + "score": 0.293957196620502, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.23373462830676886, - "sentence_nr": 3 + "score": 0.4618648817849918, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.16800102974369996, - "sentence_nr": 3 + "score": 0.5370274094269881, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3319781987745275, - "sentence_nr": 3 + "score": 0.7083576830089957, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.24007528246707907, - "sentence_nr": 3 + "score": 0.09559579550755593, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.31084467045503017, - "sentence_nr": 3 + "score": 0.22721030607923384, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.6052987576779449, - "sentence_nr": 3 + "score": 0.1620171923846691, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.643602170728296, - "sentence_nr": 3 + "score": 0.40132306100670667, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.13004800471424346, - "sentence_nr": 3 + "score": 0.10695860479627832, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.28217142159025543, - "sentence_nr": 3 + "score": 0.23093180806683553, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.3924259174695316, - "sentence_nr": 3 + "score": 0.2006816977738918, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.45050557152077386, - "sentence_nr": 3 + "score": 0.3778907443846581, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.10601317434781207, - "sentence_nr": 3 + "score": 0.2365826957720282, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.2344095627038401, - "sentence_nr": 3 + "score": 0.40015265544052253, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.12769027061800275, - "sentence_nr": 3 + "score": 0.19927817813049292, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.21844360831325868, - "sentence_nr": 3 + "score": 0.3799201142133713, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.22523697594538705, - "sentence_nr": 3 + "score": 0.30107111173332013, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.387672788880256, - "sentence_nr": 3 + "score": 0.49518939388085714, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.23705266435224473, - "sentence_nr": 3 + "score": 0.360791478420284, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.3838188339168412, - "sentence_nr": 3 + "score": 0.5455769471781564, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.262924194880141, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.42500595618199777, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.1852972751417938, - "sentence_nr": 3 + "score": 0.08431948785504675, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.36660412101424933, - "sentence_nr": 3 + "score": 0.3346001936389817, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.12018438120605004, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.41073371100714057, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.05815951947143174, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.2653698485201136, - "sentence_nr": 3 + "score": 0.25465334407321377, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.3674668904964848, - "sentence_nr": 3 + "score": 0.38141042305440287, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.40975628086142124, - "sentence_nr": 3 + "score": 0.6050551830651282, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.4815092081725061, - "sentence_nr": 3 + "score": 0.23972932680765302, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.5785251190053333, - "sentence_nr": 3 + "score": 0.45641440058943905, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.37821486365532614, - "sentence_nr": 3 + "score": 0.1753423380032761, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.4718665834023439, - "sentence_nr": 3 + "score": 0.3860867724140123, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.35423985843000033, - "sentence_nr": 3 + "score": 0.3727176326636912, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.4293667924436175, - "sentence_nr": 3 + "score": 0.5936665893096997, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.2933705789311311, - "sentence_nr": 3 + "score": 0.2883922922854595, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.35570110758127277, - "sentence_nr": 3 + "score": 0.5189711161059738, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.3931807596037881, - "sentence_nr": 3 + "score": 0.2904236353297477, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.41587358041151196, - "sentence_nr": 3 + "score": 0.5110246787073157, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.1109746392358308, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.3361655324870268, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.17679588126795498, - "sentence_nr": 3 + "score": 0.21126480857843466, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.4098986063548376, - "sentence_nr": 3 + "score": 0.5208171960282157, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.08514622725334697, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.32324062436989165, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.3675058901988579, - "sentence_nr": 3 + "score": 0.0923710591915157, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.445171638403697, - "sentence_nr": 3 + "score": 0.3515127205889234, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.3196352513221046, - "sentence_nr": 3 + "score": 0.27333560530341, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.29107087297820256, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.39517833279310743, - "sentence_nr": 3 + "score": 0.2700537967608886, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.3763743474188506, - "sentence_nr": 3 + "score": 0.05369620163577867, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.4120099199050514, - "sentence_nr": 3 + "score": 0.17786551143318782, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.38694317759010316, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.45827711860455167, - "sentence_nr": 3 + "score": 0.2714062063125971, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.3699382260470039, - "sentence_nr": 3 + "score": 0.06216795401336647, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.4032851361478274, - "sentence_nr": 3 + "score": 0.24520849281921142, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.42378190548671596, - "sentence_nr": 3 + "score": 0.20170324785987873, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.47335507275218824, - "sentence_nr": 3 + "score": 0.41777877902092536, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.3763278728427448, - "sentence_nr": 3 + "score": 0.14128339612076699, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.39009457811977266, - "sentence_nr": 3 + "score": 0.31208948933714575, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.34791594751284466, - "sentence_nr": 3 + "score": 0.14488582350016033, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.40864368085475805, - "sentence_nr": 3 + "score": 0.28091550902615625, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 + "score": 0.3738496420930624, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.5169677927619225, - "sentence_nr": 3 + "score": 0.6102099909532881, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.4056782022243561, - "sentence_nr": 3 + "score": 0.30510387810107376, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.5237454577692897, - "sentence_nr": 3 + "score": 0.5275904051812422, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.2512077334912375, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.42501440166045495, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.409211292187266, - "sentence_nr": 3 + "score": 0.20037263749423856, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.44289452305459603, - "sentence_nr": 3 + "score": 0.4116722318656521, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.12120981066263758, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.139781837804502, - "sentence_nr": 3 + "score": 0.28837260181822605, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.2134385691462796, - "sentence_nr": 3 + "score": 0.0973834065412635, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.296993231533869, - "sentence_nr": 3 + "score": 0.3349744739632232, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.23270804908165135, - "sentence_nr": 3 + "score": 0.3307802307098964, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.3478589640284733, - "sentence_nr": 3 + "score": 0.5254270621404842, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.4115167991342047, - "sentence_nr": 3 + "score": 0.27791708125521297, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.5649900101054287, - "sentence_nr": 3 + "score": 0.47469089541729687, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 + "score": 0.273867164036603, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.5169677927619225, - "sentence_nr": 3 + "score": 0.4374075951791586, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.2706805630983137, - "sentence_nr": 3 + "score": 0.2976799101963322, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.38186806613291924, - "sentence_nr": 3 + "score": 0.444010953670269, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.1890425467840326, - "sentence_nr": 3 + "score": 0.29096032128390625, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.3513019690066663, - "sentence_nr": 3 + "score": 0.4378899812097463, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.11234905986715489, - "sentence_nr": 3 + "score": 0.25430748318960106, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.15225251521949978, - "sentence_nr": 3 + "score": 0.39529487649625655, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 + "score": 0.1518385401250995, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.5135242648129007, - "sentence_nr": 3 + "score": 0.33666656185298044, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.13733894353973466, - "sentence_nr": 3 + "score": 0.39332576103648914, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.26356016327430454, - "sentence_nr": 3 + "score": 0.5905291357670327, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.22403997563360165, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.34609083438127625, - "sentence_nr": 3 + "score": 0.4163466494058026, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.15878174295086994, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.3163237075880393, - "sentence_nr": 3 + "score": 0.17289005672834318, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.014379215591354156, - "sentence_nr": 3 + "score": 0.3272275987435471, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.12943648490176665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.29942831535046555, - "sentence_nr": 3 + "score": 0.2722541020028712, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.22453002699007485, - "sentence_nr": 3 + "score": 0.1578223852154551, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.3354597455808525, - "sentence_nr": 3 + "score": 0.2505825632099063, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.24489516889906388, - "sentence_nr": 3 + "score": 0.18062196015850054, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.409369762090413, - "sentence_nr": 3 + "score": 0.28314745233130384, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.15698648794278228, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.3780009826926042, - "sentence_nr": 3 + "score": 0.2621553805617084, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.26696378876165927, - "sentence_nr": 3 + "score": 0.2644420173449953, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.3581548569027847, - "sentence_nr": 3 + "score": 0.4942477585038362, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.1262744724314408, - "sentence_nr": 3 + "score": 0.18599084133498495, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.25266688865379994, - "sentence_nr": 3 + "score": 0.5118786242195569, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.12061450720815534, - "sentence_nr": 3 + "score": 0.13597984690621734, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.28227862122593256, - "sentence_nr": 3 + "score": 0.3458330288507223, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.20402030918588085, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.456698795546807, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.26518122980477765, - "sentence_nr": 3 + "score": 0.2309839071027402, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.514846875413034, - "sentence_nr": 3 + "score": 0.49038835977529066, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.08868436194048783, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.05176181222975041, - "sentence_nr": 3 + "score": 0.3831080989149587, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.2082376263771737, - "sentence_nr": 3 + "score": 0.45698403959641504, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.3665582908776792, - "sentence_nr": 3 + "score": 0.6213112392290069, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.38305744058127894, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.010917030567685585, - "sentence_nr": 3 + "score": 0.5478318338650964, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.24258147673307567, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.22316698150381944, - "sentence_nr": 3 + "score": 0.4209309051959654, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5323299634491547, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.18341524527739528, - "sentence_nr": 3 + "score": 0.6662250066806382, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.21820009804660956, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.3989952325675248, - "sentence_nr": 3 + "score": 0.4781988345309841, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.3925121365052661, - "sentence_nr": 3 + "score": 0.43221578809726585, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.47788592802001717, - "sentence_nr": 3 + "score": 0.5731291999928431, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.35423985843000033, - "sentence_nr": 3 + "score": 0.32971194511331664, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.4401068255722377, - "sentence_nr": 3 + "score": 0.5743696901533981, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3699456463838542, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.19263684669277223, - "sentence_nr": 3 + "score": 0.5802398187253943, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.3020103087706165, - "sentence_nr": 3 + "score": 0.17822917800610066, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.36625663694634303, - "sentence_nr": 3 + "score": 0.4151867931118682, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.4772894233335957, - "sentence_nr": 3 + "score": 0.21565717351206007, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.6272300080155604, - "sentence_nr": 3 + "score": 0.43093412084257204, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3418125302637748, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.2523841428380167, - "sentence_nr": 3 + "score": 0.49442592936157403, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.24463616303656896, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.47836315300803617, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.11146727460890443, - "sentence_nr": 3 + "score": 0.17956658431980232, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.23969027175152666, - "sentence_nr": 3 + "score": 0.40953674896318, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.15222275337751698, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.1123102665458642, - "sentence_nr": 3 + "score": 0.3357964670722481, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 3 + "score": 0.09176232845253508, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.32305168353427943, - "sentence_nr": 3 + "score": 0.3662849506582589, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.12789533377801793, - "sentence_nr": 3 + "score": 0.12443062751342439, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.2283763803651714, - "sentence_nr": 3 + "score": 0.23475988618545057, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.15896519992112562, - "sentence_nr": 3 + "score": 0.10074078347174975, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.29513999460654694, - "sentence_nr": 3 + "score": 0.4196634384767732, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 3 + "score": 0.2421027255090914, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.2596718628394258, - "sentence_nr": 3 + "score": 0.43034076639606794, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.30998874327213094, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.17615962296513688, - "sentence_nr": 3 + "score": 0.5576631273432328, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.11895105908708084, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.24431474547499252, - "sentence_nr": 3 + "score": 0.36047024705778663, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.13502367316243039, - "sentence_nr": 3 + "score": 0.14936222269982402, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.2597905925100196, - "sentence_nr": 3 + "score": 0.31574081509853114, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.28254292734082, - "sentence_nr": 3 + "score": 0.19532472677364568, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.408604647745239, - "sentence_nr": 3 + "score": 0.28229489197306035, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.16279348731624776, - "sentence_nr": 3 + "score": 0.1871593662064507, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.3149639290246331, - "sentence_nr": 3 + "score": 0.2976064158089205, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.17743299460161885, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2727203075505105, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.28200049157537727, - "sentence_nr": 3 + "score": 0.18732171292752747, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.40682121687319617, - "sentence_nr": 3 + "score": 0.3771250747486541, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.10277283552544851, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.20069190971412876, - "sentence_nr": 3 + "score": 0.30275778273888243, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.12409597120849801, - "sentence_nr": 3 + "score": 0.2311945565876858, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.19449255248446348, - "sentence_nr": 3 + "score": 0.4191879346159698, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.3571150500823898, - "sentence_nr": 3 + "score": 0.22242657086332168, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.44642876819396304, - "sentence_nr": 3 + "score": 0.42530875747218794, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.3571150500823898, - "sentence_nr": 3 + "score": 0.16725758583047057, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.5018967494794737, - "sentence_nr": 3 + "score": 0.4301201864661688, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.3572188192648703, - "sentence_nr": 3 + "score": 0.2249207301180078, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.45381175288762937, - "sentence_nr": 3 + "score": 0.41620574629813584, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.3572188192648703, - "sentence_nr": 3 + "score": 0.08931590096116851, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.42937064888927773, - "sentence_nr": 3 + "score": 0.3195248805327168, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.2513073726775429, - "sentence_nr": 3 + "score": 0.16460358737053557, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.3798674638470122, - "sentence_nr": 3 + "score": 0.3762211520402395, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.27274191069381915, - "sentence_nr": 3 + "score": 0.080483139730987, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.39756349135906077, - "sentence_nr": 3 + "score": 0.2603682099909762, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.27289712011595696, - "sentence_nr": 3 + "score": 0.008644306419269237, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.3712305452784761, - "sentence_nr": 3 + "score": 0.035638819901027775, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.13232291594986312, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.3103406767609728, - "sentence_nr": 3 + "score": 0.14213391357743071, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.08673245635389941, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.25266660249677875, - "sentence_nr": 3 + "score": 0.09835845272625326, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.2295748846661433, - "sentence_nr": 3 + "score": 0.152904843780367, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.3277581848986239, - "sentence_nr": 3 + "score": 0.31008937332181585, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.11902001907030836, - "sentence_nr": 3 + "score": 0.29474400772477727, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.2714975491916872, - "sentence_nr": 3 + "score": 0.44093996056306717, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.19040700845445938, - "sentence_nr": 3 + "score": 0.24768872234171613, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.354176495487078, - "sentence_nr": 3 + "score": 0.375309424077234, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.08968235248346597, - "sentence_nr": 3 + "score": 0.1377011720764204, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.2175311081388801, - "sentence_nr": 3 + "score": 0.27757197306209697, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.10682827247639556, - "sentence_nr": 3 + "score": 0.16775100491410364, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.21551117313912851, - "sentence_nr": 3 + "score": 0.30868449832998995, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.07425055521504613, - "sentence_nr": 3 + "score": 0.13258797836353825, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.18122341046764998, - "sentence_nr": 3 + "score": 0.28552565766604676, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.16352670859125373, + "score": 0.8107492451395732, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.26406333983187025, + "score": 0.900032747778274, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.8761560783209453, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.14974959199825547, + "score": 0.9484564543183253, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.10640850690356463, + "score": 0.7505336182671021, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.152668380659781, + "score": 0.8401910628269498, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.1022875701616399, + "score": 0.13725861056573663, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.21585478443422898, + "score": 0.11147384852362276, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.09812163258584553, + "score": 0.13453927150397377, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.23909785111906673, + "score": 0.10522974272748564, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.22055493694673897, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.18802425548665458, + "score": 0.3931965048763613, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.10401577613691954, + "score": 0.839587623092576, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.16184347717072042, + "score": 0.9096086668952811, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.6885326214539055, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.006827911047017742, + "score": 0.8229812189228393, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.839587623092576, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.2164910348876327, + "score": 0.9096086668952811, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.1472462377094902, + "score": 0.412295470431275, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.30525310195831357, + "score": 0.705800771033924, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.2774527633525211, + "score": 0.3480442076026084, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.4358323759361012, + "score": 0.6142483232997242, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.1978585723043446, + "score": 0.2861853478258715, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.3527599187160617, + "score": 0.6401604432917332, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.3021375397356768, + "score": 0.14172292406325543, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.460354013765958, + "score": 0.4762857001428092, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.12991916506579942, + "score": 0.17401517708317762, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.27889392608860697, + "score": 0.45006261596496794, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.1959280139287724, + "score": 0.151240443751577, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.36708131749832384, + "score": 0.4224869587588239, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.15663915411954296, + "score": 0.21850594525107195, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.30522234950263477, + "score": 0.4049269026117245, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.3166144686275811, + "score": 0.2028736642487601, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.39315143982598805, + "score": 0.3614856639698008, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.2255489037266197, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.3954925749722234, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.3761648431086742, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.5466736282576272, + "score": 0.2466674257522263, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.2929807168354841, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.5975595069845072, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.6689604664235209, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.26007925058007886, + "score": 0.8010329764520807, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.31314224813827346, + "score": 0.4678134833959513, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3932583887521134, + "score": 0.5051480556620123, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.5199302229930708, + "score": 0.4113125177363443, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.6017481019884499, + "score": 0.42808075762838727, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.2523019529343173, + "score": 0.32685141385924577, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.4406369072888057, + "score": 0.3758692873615971, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.4195959935514934, + "score": 0.27341185048222727, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5643628666004862, + "score": 0.6411651849711889, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.35974578964005544, + "score": 0.6237774736059616, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5294218015563622, + "score": 0.8500131524897436, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.3615855225145535, + "score": 0.573764722928549, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.4838257582776513, + "score": 0.817979859532479, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.4855332614117322, + "score": 1.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.5860317733541353, + "score": 1.0, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.4671778989333441, + "score": 0.8482942955247808, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.5801543735794272, + "score": 0.9256238040654331, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 1.0, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 1.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.4501609222100726, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5779114321769039, + "score": 0.27483211854002193, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.20164065196183215, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.06170484898727104, + "score": 0.3923533979663226, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.27199778234956107, + "score": 0.4212555584968603, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.4129085501138227, + "score": 0.44264089366400194, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.24728515687112834, + "score": 0.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.31221693968406194, + "score": 0.25034600728678114, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.19230188007838597, + "score": 0.3471790743028735, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.3407021378942239, + "score": 0.5354826964964929, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.41072675483179805, + "score": 0.14528679532351443, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5635589150380774, + "score": 0.45626264176882697, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.4122974402951816, + "score": 0.19496249079519765, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.49812945858788304, + "score": 0.3404780052299898, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.09812163258584553, + "score": 0.47631009147745074, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.2826014149329834, + "score": 0.6668706097750393, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.16954555291875742, + "score": 0.3312076918041707, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.31800856700824265, + "score": 0.5907489769215882, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, + "score": 0.1481394578697113, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, + "score": 0.4346232049071254, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.41489895705463226, + "score": 0.20217803037339238, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5351666707169298, + "score": 0.4743589083194767, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.8492326635760689, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.9048530940348648, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.2505523539251516, + "score": 0.20828838183973028, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.3831975160557709, + "score": 0.3002418280717453, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.14710052131359536, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.03616809285846403, + "score": 0.2322531742374544, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.19004145843928574, + "score": 0.19427446513842178, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.3699750032227405, + "score": 0.2821379316874468, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.12045422179467957, + "score": 0.4962806188414642, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.22184013352319704, + "score": 0.6142754339875535, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, + "score": 0.8862476419965991, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.6612342258381259, + "score": 0.9357668560693397, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.3883375900135818, + "score": 0.5181212746323438, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.4643731845106876, + "score": 0.6637469240656353, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.11556647985416685, + "score": 0.4499781507152021, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.23748323325414847, + "score": 0.5255804479733618, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.2589451141492935, + "score": 0.34190945667692124, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.36265302427150575, + "score": 0.38471892347518344, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.21223633441554032, + "score": 0.40122658664791466, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.31520576641768766, + "score": 0.5789876781041586, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.45167594566243024, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.4937637674644026, + "score": 0.28743011594299434, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.1532685994792829, + "score": 0.2803950119994004, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.2563896884841204, + "score": 0.45110582913319186, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.2818720423903184, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.3950106268119009, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.17793925745339384, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.25957530464210776, + "score": 0.28012888208050135, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.40533142214730206, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.6060585353811423, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.326434800689293, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.18223058800557917, + "score": 0.4480048055728212, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.33062429129755794, + "score": 0.3861304705880985, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.4887128900317842, + "score": 0.5248223977765574, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.6064630666233242, + "score": 0.36210097004176117, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.6712747226800536, + "score": 0.5007651303154664, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, + "score": 0.3875738939089708, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.7123666275414222, + "score": 0.6165609648382772, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.6560788161152474, + "score": 0.10593991511157044, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.25841450487427714, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.44494784218322847, + "score": 0.12522096668112345, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.18279744869146425, + "score": 0.0, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.4024804882630303, + "score": 0.1209593074330819, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.4245970617334277, + "score": 0.2476165058078653, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.5694561181202089, + "score": 0.5008770061123741, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.3360020594873999, + "score": 0.3187271473320672, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.4979723869498355, + "score": 0.5308594426934777, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.20158074753947833, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.4601245932152929, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.2676032275663791, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.40689458580141896, + "score": 0.08214465123593925, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.22319449652482443, + "score": 0.031835895924415845, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.233078149078302, + "score": 0.0, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.41167690646865734, + "score": 0.030384560195149946, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.13536681105774234, + "score": 0.20687913047075865, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.23595972523293418, + "score": 0.22828954232316065, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.28479942163807365, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.14728954312449322, + "score": 0.3268532842861261, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.2256490809237466, + "score": 0.16442902274779153, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.37300331821940047, + "score": 0.2560785881872606, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.24883108274644028, + "score": 0.1420515519076652, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.3329995141432608, + "score": 0.44106281582354434, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.13827175716697776, + "score": 0.13714971174966237, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.25880112791050663, + "score": 0.40286739446153647, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.11856660123276004, + "score": 0.1420515519076652, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.21303170584506914, + "score": 0.42828528785491377, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.24017995835117018, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.3874922193559855, + "score": 0.24187339355165743, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.26505211456170086, + "score": 0.24258882871985424, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.3924854246504529, + "score": 0.5303410271196803, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.2524885334037127, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.478828996001936, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.1601125708485386, + "score": 0.3276463794734248, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.23752778281494039, + "score": 0.5691582878087721, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.033908919446183204, + "score": 0.08644426594013177, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.3191159905793865, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.15843589640881697, + "score": 0.5859514292713682, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.34791594751284466, + "score": 0.3401185071799048, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.4062384532979022, + "score": 0.46989148838686473, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.7795149903947967, + "score": 0.22911003908657962, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.8912732146280626, + "score": 0.47520559779668814, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.6230832293767097, + "score": 0.3842217688744563, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.6994652193905146, + "score": 0.5778946517730051, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.36763082847636347, + "score": 0.2115210387013347, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.4293667924436175, + "score": 0.37360356038205855, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.43728553857900826, + "score": 0.10752875081378005, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.43806460289276367, + "score": 0.3325883825268305, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.41546060026113085, + "score": 0.0, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.4663335536810786, + "score": 0.20583079363601006, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.6230832293767097, + "score": 0.4354676264744325, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.6994652193905146, + "score": 0.6617572899591347, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.4578226095312774, + "score": 0.687028746592056, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.5429809487027987, + "score": 0.7656691936879507, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.5948821108337683, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.7754376703404752, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.4390960897971484, + "score": 0.47992366044240764, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.5164819146651056, + "score": 0.5677112072864312, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.1600733593956426, + "score": 0.40283046047758825, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.2350173429055301, + "score": 0.6078521036010628, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.27182269429130823, + "score": 0.583899712825618, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.3135936384682831, + "score": 0.6318158012380937, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.33737554588923646, + "score": 0.3267294026204632, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.369875665962309, + "score": 0.4333271065990519, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.30389058699653954, + "score": 0.3875738939089708, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4310896909809194, + "score": 0.4061202407032717, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.3763278728427448, + "score": 0.4478039777693801, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4261526683335186, + "score": 0.4520078853554537, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.36210097004176117, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.4423339372603474, + "score": 0.2284240389125248, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.3763278728427448, + "score": 0.14669686413046806, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.4047854120649662, + "score": 0.4859054130771095, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.3780488661667278, + "score": 0.2034208182228192, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.38749538363085073, + "score": 0.574929092637314, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.4246183605185108, + "score": 0.18818468056723417, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.501948957312799, + "score": 0.35937272275747895, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.4246183605185108, + "score": 0.19394698940390126, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.501948957312799, + "score": 0.26825696990387976, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.18661491233517533, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.2813929671493715, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.32001589569502475, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.41050505670086324, + "score": 0.3444628323551915, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.21163700429456012, + "score": 0.09784168213672303, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.2754475815887392, + "score": 0.29894945474335277, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.25437706194445847, + "score": 0.0, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.28858721839661267, + "score": 0.1333681278040752, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.3924259174695316, + "score": 0.18759202316167212, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.4246539836622663, + "score": 0.29125622859770967, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.39022736644855677, + "score": 0.360056585428503, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.5202587069271436, + "score": 0.6205563386317268, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.3672404084841361, + "score": 0.2965712396518154, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.4518744271362045, + "score": 0.4982942628716768, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.34332704063408953, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.408639131209588, + "score": 0.31282656848317575, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.3672404084841361, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.43288682804187184, + "score": 0.2635537115709103, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.3120848453730729, + "score": 0.09562406574442013, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.3705328519354151, + "score": 0.3567495948423568, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.4185938787651429, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.49815219590152665, + "score": 0.10863331427699639, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.18059154473936082, + "score": 0.1545264805839251, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.3333808784117231, + "score": 0.1540863895003299, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.12991916506579942, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.15172157858081844, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.27334594211112967, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.35690726265980793, + "score": 0.16857118556223663, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.31277600813200596, + "score": 0.0, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.3423771857129358, + "score": 0.16664773157279084, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.25944320225692963, + "score": 0.0, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.3049825437064534, + "score": 0.20610814612963693, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.21027545940631823, + "score": 0.39877487248588855, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4572185175571455, + "score": 0.5310673226738754, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.25383339228798274, + "score": 0.35530535255335743, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.46123461430035645, + "score": 0.49317248045883316, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.16401798649868696, + "score": 0.3587511367366511, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.3760928911869727, + "score": 0.4883760305954916, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.31756348590173983, + "score": 0.12673718536830808, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.612638372401986, + "score": 0.3654897220064675, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.1535259783865636, + "score": 0.2061477352156375, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.35449697447586703, + "score": 0.47004751394317756, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.14113991930789777, + "score": 0.14939354788683526, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.33851813032056655, + "score": 0.32530330530848595, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.288452820117579, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.48305135114165254, + "score": 0.3157547101764287, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.25383339228798274, + "score": 0.276162846755385, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.46285845798510755, + "score": 0.590708674573307, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.24111227980147207, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.4551171989910026, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.307137308263447, + "score": 0.22537412722674857, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.6108807661013372, + "score": 0.5140931896637475, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.104552581027927, + "score": 0.27668736912821895, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.22766480821275292, + "score": 0.40498166849882933, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.24623953025272619, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.2140405179077785, + "score": 0.4432077157216156, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.11414633188690328, + "score": 0.5626933387118171, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.2503197875391322, + "score": 0.7324678592100804, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.24622924391353238, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.17807129401511626, + "score": 0.5061411895546121, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.07843772989359644, + "score": 0.33713540983351536, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.2669076052967215, + "score": 0.575216587193883, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.12913533075470382, + "score": 0.18512357977360452, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.25219595014343127, + "score": 0.3375501419021754, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.108043996762779, + "score": 0.276824319389769, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.24726477214045167, + "score": 0.41668286331996945, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.07418182487612639, + "score": 0.13597796343834895, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.26363958328353637, + "score": 0.3135708685608336, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.24883108274644028, + "score": 0.25590356077469273, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.2911305725820655, + "score": 0.3573797368487792, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.1022763758993479, + "score": 0.21800193956058223, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.2461976716440084, + "score": 0.38946101797520266, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.1136025775201101, + "score": 0.2046592065585361, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.2506114986319833, + "score": 0.36230063786538963, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.11092770141728163, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.24752626772001793, + "score": 0.09520142487881139, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.1596217869107113, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.1179949261549654, + "score": 0.34479856217047133, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.108043996762779, + "score": 0.1542516097843049, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.2560670416470495, + "score": 0.3214558584292576, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.18105048502088059, + "score": 0.20891853890830714, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.271054087912132, + "score": 0.4493410190472382, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.3029928206533524, + "score": 0.3892904392414698, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.45262153733641225, + "score": 0.7105799594213658, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.43000007605628365, + "score": 0.18838122823966647, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.5073076708050636, + "score": 0.3774817267101267, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.4125656013737127, + "score": 0.27571859863660825, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4621663225660634, + "score": 0.6161549134554137, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.3512502252598613, + "score": 0.6147881529512643, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.43869157736529, + "score": 0.8512585023349323, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.10991514729498916, + "score": 0.3451395513935865, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.2549321593272589, + "score": 0.5966234405388378, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.409211292187266, + "score": 0.4874671560842625, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.5745300552045192, + "score": 0.5556100337221122, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.3471636178393148, + "score": 0.4125656013737127, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.4191180487309204, + "score": 0.47300889567828913, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.4874671560842625, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.5556100337221122, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.17493830569974406, + "score": 0.17059573701616804, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.26690261630673184, + "score": 0.3047807762878146, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.20600299538363603, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.13110052718009899, + "score": 0.3255301396673795, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.25678404806291744, + "score": 0.0, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.35179752925748303, + "score": 0.2599500366651973, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.16767849550785174, + "score": 0.670001214025099, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.32211607665330505, + "score": 0.8165101710241945, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.5386933265263314, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.3068038152276031, + "score": 0.7161301762977397, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.17278760032513005, + "score": 0.7743810851655712, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.3872014058095359, + "score": 0.8501530116907571, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.15663915411954296, + "score": 0.16698954254101847, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.3014802378681773, + "score": 0.4261570418098611, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.11117895489854909, + "score": 0.05213452347557882, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.2820967744384662, + "score": 0.26774712462225825, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.16363363237367146, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.2305923243495954, + "score": 0.4703098343850332, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.3552281813814547, + "score": 0.40784428374394943, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.4760811876074703, + "score": 0.5381101081489561, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.34371432547871084, + "score": 0.6425503166524515, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.5618168006545909, + "score": 0.7487396970714064, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.5538142279874649, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.7100363957633058, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.1636336323736715, + "score": 0.21552806748139675, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.31453116716344526, + "score": 0.42322383185606965, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.11908088655595703, + "score": 0.17166186698685532, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.1504843536148922, + "score": 0.0, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.2872819820921106, + "score": 0.18367413170243496, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.1032637856654291, + "score": 0.2675267612290173, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.3875448571474537, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.12256115024243901, + "score": 0.6080656767908881, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.1667153530685962, + "score": 0.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.2563322947493884, + "score": 0.26483859169215596, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.18882437844970767, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.2977239406428574, + "score": 0.24826026845603086, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.11676324068066542, + "score": 0.1676784955078518, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.17463781885740615, + "score": 0.4080601696403001, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.2186976686325118, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.14516097783347692, + "score": 0.39566479324421505, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.11548431380162248, + "score": 0.7267072830982378, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.20558096287614028, + "score": 0.7866205781286685, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.1022875701616399, + "score": 0.5053915409642312, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.20801464611670326, + "score": 0.5997255667760161, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.565361573648609, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.7078194218800181, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.08036517486170719, + "score": 0.8078150978406724, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.13972284796504708, + "score": 0.9277882372664237, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.4122868806605698, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.002670940170940171, + "score": 0.6594447026416659, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.9086549610666288, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.07993498817966901, + "score": 0.9619796185510897, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.17694915875422723, + "score": 0.1854768778301409, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.20162477784805663, + "score": 0.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.4096636937616398, + "score": 0.32434904462209013, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.15756751766261828, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.18653722013077995, + "score": 0.3930524987681875, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.1075467277016126, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.19568663135308928, + "score": 0.10289041531199654, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.20001812735957777, + "score": 0.13735392786968814, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.10356012289702854, + "score": 0.1529727623288305, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.31649183953796767, + "score": 0.3589485409071449, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.4627135773217405, + "score": 0.5118532683220741, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.1504843536148922, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.2387391897386676, + "score": 0.40413967944464674, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.3073381334558822, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.47175704360401727, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.14127146367040996, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.21761435882075786, + "score": 0.19918231170682932, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.13196931585453256, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.25888526562309433, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.13897158881271357, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.1371326695219561, + "score": 0.2586128831320178, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.21582567264281033, + "score": 0.22755587699269406, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.15923351303897038, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.16856369030953883, + "score": 0.48920516349347787, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.09857961422425707, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.17222588358752802, + "score": 0.3596883714131735, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.30166728533047465, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.2079623070209971, + "score": 0.49325034878601914, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.5041350242010638, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.17459825687868188, + "score": 0.593218021252677, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.38403730312776824, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.13556459893693668, + "score": 0.4313278522148071, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.2298838546025776, + "score": 0.1867326928627706, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.4405136963304348, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.17271632813084475, + "score": 0.6684728376823417, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.8843946454355334, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.9484564543183253, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.15002157377943118, + "score": 0.25845327815429825, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.47631009147745074, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.20909575217027426, + "score": 0.6947042435623316, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.47631009147745074, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.1710093902521564, + "score": 0.6947042435623316, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.17620697694486812, + "score": 0.2082198320914845, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.3349127944613063, + "score": 0.4623467870246507, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.49806572776935465, + "score": 0.30384451027588233, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.643432991222625, + "score": 0.6512504803383198, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.40515809323728763, + "score": 0.2080315522738391, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.5449200667389397, + "score": 0.5621055754076354, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.47992366044240764, + "score": 0.11002068284361832, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.6339648696206408, + "score": 0.3834796809260055, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.33106709338898954, + "score": 0.5131275135405763, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.47018261317709287, + "score": 0.6459274917556753, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.11758806799906993, + "score": 0.2940242361968644, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.2301535009751364, + "score": 0.42080477541613315, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.3757154576765614, + "score": 0.2834458481265108, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.5486864756832853, + "score": 0.513372427519788, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.3757154576765614, + "score": 0.2653242203314122, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.5486864756832853, + "score": 0.3884124015564349, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.2653242203314122, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.34894923767842045, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.20313943660946668, + "score": 0.11434800023292974, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.37106764025714706, + "score": 0.2765427274562731, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.19761584919202016, + "score": 0.25573580684957503, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.17042178650486198, + "score": 0.0, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.3381936694419076, + "score": 0.20221836056343176, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.15285713412458543, + "score": 0.35296675898522734, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.24746988080227628, + "score": 0.6528129880382749, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.17840382725791726, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.22777548520508317, + "score": 0.48242888261061057, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.10845182904139573, + "score": 0.16513339589011042, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.27825798566245524, + "score": 0.31848886796533626, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.21143896166926268, + "score": 0.3560907191428757, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.09264863757089695, + "score": 0.130145254194367, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.28521768773900896, + "score": 0.3939630948438474, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.09654156210312353, + "score": 0.220561161454765, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.28937694627449495, + "score": 0.4391853573161077, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.15858053013682752, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.2610745423686279, + "score": 0.3498719376380512, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.14153945350617025, + "score": 0.0675147309173167, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.2844284823024208, + "score": 0.33388033606118805, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.11123005554481152, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.34466307593766776, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.13547277341758465, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.2717018577858807, + "score": 0.24323957127167917, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.15542859771226727, + "score": 0.0, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.24568580024277256, + "score": 0.5437359425177195, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.14132052098159442, + "score": 0.13737279171076758, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.28750248252681143, + "score": 0.500804220867966, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.21004850229269248, + "score": 0.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.3447734406041285, + "score": 0.14718371279247996, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.22743363869750483, + "score": 0.2592494576098376, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.4248437050568334, + "score": 0.5612265609510381, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.29579846078012384, + "score": 0.310441435588881, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.5170279422640637, + "score": 0.47844112187242127, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.33018775735516415, + "score": 0.21223633441554032, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.4302626210819361, + "score": 0.2461201947986108, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.17034799410713272, + "score": 0.33094680953828387, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.33202264871471165, + "score": 0.26333181655541393, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.10389889377420038, + "score": 0.30094298890378757, "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.251017701718585, + "score": 0.2718655706895806, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.4001601601922499, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.5775093624441812, + "score": 0.2152312523781826, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.34426631072695274, + "score": 0.0, "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.460136778351592, + "score": 0.24225277690337235, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0, + "score": 0.21015671316214898, "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.0, + "score": 0.35725298833268504, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.24727997687684689, + "score": 0.0, "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.45638354728732916, + "score": 0.24331740192945808, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.0, + "score": 0.3669898885019683, "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.16500804259372737, + "score": 0.4853134986602024, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.1411791520898124, + "score": 0.16133948681475338, "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.2982093797567791, + "score": 0.3064239315441572, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.2512712267295304, - "sentence_nr": 3 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.2928484110896528, - "sentence_nr": 3 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.20723903671796345, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.2746682387996949, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.3626305461419687, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.36369161190356464, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.20015392096614926, - "sentence_nr": 3 + "score": 0.7215691881328408, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.31608746169943613, - "sentence_nr": 3 + "score": 0.735100789804592, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.1333497993210919, - "sentence_nr": 3 + "score": 0.4765874091118851, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.2934780949245599, - "sentence_nr": 3 + "score": 0.45911557772276623, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.36332807605324846, - "sentence_nr": 3 + "score": 0.45022125383821326, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.36862616956526695, - "sentence_nr": 3 + "score": 0.46874267375238576, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.2512712267295304, - "sentence_nr": 3 + "score": 0.2028736642487601, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.2905232453852545, - "sentence_nr": 3 + "score": 0.43458947791319813, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.2556428001399479, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.3140105990938112, - "sentence_nr": 3 + "score": 0.3013901676230198, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.2590924722191636, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.3107725858839102, - "sentence_nr": 3 + "score": 0.38106012955734714, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.16634906495439625, - "sentence_nr": 3 + "score": 0.3820562306791339, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.24471852734537655, - "sentence_nr": 3 + "score": 0.5488313413379253, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3820562306791339, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.05742930892025214, - "sentence_nr": 3 + "score": 0.5488313413379253, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.26702493151987683, - "sentence_nr": 3 + "score": 0.38091370416670794, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.32737126018440355, - "sentence_nr": 3 + "score": 0.540550443602966, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.2539593860148789, - "sentence_nr": 3 + "score": 0.23189835231884592, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.31852925024650675, - "sentence_nr": 3 + "score": 0.44157797833899437, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.22650664914012167, - "sentence_nr": 3 + "score": 0.2294068720558097, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.44021101913103755, - "sentence_nr": 3 + "score": 0.40458364050078693, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.3039519244807058, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5267184242152971, - "sentence_nr": 3 + "score": 0.32588643749980295, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.42843223509772244, - "sentence_nr": 3 + "score": 0.31430120091187586, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.6251003871955769, - "sentence_nr": 3 + "score": 0.5013155459452984, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.10511846841633776, - "sentence_nr": 3 + "score": 0.45237912327122276, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.3196991085171128, - "sentence_nr": 3 + "score": 0.6299071573751139, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.21472986052554088, - "sentence_nr": 3 + "score": 0.23736810439041953, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.4245082395812861, - "sentence_nr": 3 + "score": 0.4004852416401387, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.3493450883427864, - "sentence_nr": 3 + "score": 0.3722001929300059, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.4989784300945961, - "sentence_nr": 3 + "score": 0.5252698638532942, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.34448756871464103, - "sentence_nr": 3 + "score": 0.5605065818946205, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.5072773512435553, - "sentence_nr": 3 + "score": 0.586853267829013, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.36300296341860155, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5816676674074003, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.2563499792328283, - "sentence_nr": 3 + "score": 0.1258907882951215, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.36968859129850784, - "sentence_nr": 3 + "score": 0.34143648068854054, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.28765408533715414, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.15071604670320132, - "sentence_nr": 3 + "score": 0.4046608868073569, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.17278760032513005, - "sentence_nr": 3 + "score": 0.1819722649161304, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3286210367348145, - "sentence_nr": 3 + "score": 0.44254730215235283, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.28295596283263513, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.1416851070122953, - "sentence_nr": 3 + "score": 0.6067794553589253, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.13305199541830684, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.23506726943391335, - "sentence_nr": 3 + "score": 0.43244987270004115, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3377385620641691, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.16170884319006984, - "sentence_nr": 3 + "score": 0.5256128450453542, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1175771442804648, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.21768601999695544, - "sentence_nr": 3 + "score": 0.3103572690939351, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.12546912767038895, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.11414803586430372, - "sentence_nr": 3 + "score": 0.2651343523961406, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.12407216162020399, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.1340366040130036, - "sentence_nr": 3 + "score": 0.2664864612493293, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5322520826224556, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.1768181514874441, - "sentence_nr": 3 + "score": 0.727131667480615, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5322520826224556, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.1610171131700526, - "sentence_nr": 3 + "score": 0.727131667480615, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5266403878479265, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.10844052180809807, - "sentence_nr": 3 + "score": 0.6838125749299477, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6144246566045058, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.17299030784008884, - "sentence_nr": 3 + "score": 0.7550732449201221, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2092659579124333, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.005676804759572728, - "sentence_nr": 3 + "score": 0.5354766759595367, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1284866896836278, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.16738626209234725, - "sentence_nr": 3 + "score": 0.54959682211865, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.2250265947708922, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.29110362374826554, - "sentence_nr": 3 + "score": 0.5470973834864862, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.30776997671170997, - "sentence_nr": 3 + "score": 0.5703017172567459, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5302815626138546, - "sentence_nr": 3 + "score": 0.78509136371851, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.355402294764931, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.4505070984023486, - "sentence_nr": 3 + "score": 0.5226605904538532, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 + "score": 0.289331164128846, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.30286447545869927, - "sentence_nr": 3 + "score": 0.5798018459101258, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.23819486101149287, - "sentence_nr": 3 + "score": 0.32001589569502475, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.2942276102068942, - "sentence_nr": 3 + "score": 0.5274343388526991, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.21738402658823416, - "sentence_nr": 3 + "score": 0.31869191523653845, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.34769229172545757, - "sentence_nr": 3 + "score": 0.5973293882694002, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.2910873658777246, - "sentence_nr": 3 + "score": 0.20038908500140973, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.4244075490204617, - "sentence_nr": 3 + "score": 0.38365854681342043, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.26788770505940324, - "sentence_nr": 3 + "score": 0.6666467303030572, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.40022938036917965, - "sentence_nr": 3 + "score": 0.6471929785766445, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.23287896954139942, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.34835288582718865, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.24669426816409512, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.37864393709540933, - "sentence_nr": 3 + "score": 0.4126375622076229, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.017203321134774727, - "sentence_nr": 3 + "score": 0.3664818394063563, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1404678994830558, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.24138580957738648, - "sentence_nr": 3 + "score": 0.470059848862195, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.1411791520898124, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.2902470444985328, - "sentence_nr": 3 + "score": 0.37429964421261713, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.2171788734284664, - "sentence_nr": 3 + "score": 0.1475256903804988, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.33899728453126426, - "sentence_nr": 3 + "score": 0.412830471303484, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.1050176352370787, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.3096045806359874, - "sentence_nr": 3 + "score": 0.3097707930450913, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.14635786490292246, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.2972359344684186, - "sentence_nr": 3 + "score": 0.38964624025381533, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.09881511482966875, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.2992587662174414, - "sentence_nr": 3 + "score": 0.35722668031235094, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.21997327371895098, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.34239865333501235, - "sentence_nr": 3 + "score": 0.359712726799806, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.14458350226851174, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.291471427554394, - "sentence_nr": 3 + "score": 0.499435958665741, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.25381494737245897, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.2187139093978746, - "sentence_nr": 3 + "score": 0.6023865926498774, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.20828838183973028, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.21375267056360892, - "sentence_nr": 3 + "score": 0.3919127720212458, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.14458350226851174, - "sentence_nr": 3 + "score": 0.3191922067457696, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.291471427554394, - "sentence_nr": 3 + "score": 0.4322938742789178, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.302221525161365, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.16292532792713388, - "sentence_nr": 3 + "score": 0.494647923435533, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.09881511482966875, - "sentence_nr": 3 + "score": 0.14239015212277228, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.3096045806359874, - "sentence_nr": 3 + "score": 0.39011057458940945, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.35423985843000033, - "sentence_nr": 3 + "score": 0.3312615528522037, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.449850771779881, - "sentence_nr": 3 + "score": 0.5044209915579331, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.5572806310452209, - "sentence_nr": 3 + "score": 0.379527516398695, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.8076383886663636, - "sentence_nr": 3 + "score": 0.5401652761721537, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.40214612768560637, - "sentence_nr": 3 + "score": 0.4063299618766256, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5573169779174251, - "sentence_nr": 3 + "score": 0.5674287431823071, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.3642301318550941, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.6103580099130892, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.2433923519922121, - "sentence_nr": 3 + "score": 0.2878333693524178, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.4163168327571102, - "sentence_nr": 3 + "score": 0.5587364816350978, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.08669514820036678, - "sentence_nr": 3 + "score": 0.2878333693524178, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.2499545634224168, - "sentence_nr": 3 + "score": 0.5587364816350978, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.44875232315488756, - "sentence_nr": 3 + "score": 0.2204887282071633, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.5111093822962448, - "sentence_nr": 3 + "score": 0.29000609768684643, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.3337430862067958, - "sentence_nr": 3 + "score": 0.4718372009351201, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.4389882441117941, - "sentence_nr": 3 + "score": 0.514371044868007, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.2167183006379162, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.22719782145384587, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.2770840545468946, - "sentence_nr": 3 + "score": 0.21061661601439866, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.41497301664586334, - "sentence_nr": 3 + "score": 0.25309156506322616, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2920066581890844, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.16291153978891829, - "sentence_nr": 3 + "score": 0.36052310235569734, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.2578557675410513, - "sentence_nr": 3 + "score": 0.3813114671801389, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.3636972258269605, - "sentence_nr": 3 + "score": 0.39681350602982185, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.4815092081725061, - "sentence_nr": 3 + "score": 0.3032929624979452, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.46880886343224853, - "sentence_nr": 3 + "score": 0.6298789412741047, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.2782546336574456, - "sentence_nr": 3 + "score": 0.3841997815718642, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.42327771190588576, - "sentence_nr": 3 + "score": 0.7084389853258415, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.39109158855739823, - "sentence_nr": 3 + "score": 0.3212854967972961, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.48247427218434713, - "sentence_nr": 3 + "score": 0.5882101692160411, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.37821486365532614, - "sentence_nr": 3 + "score": 0.2382939784364074, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.479249956424894, - "sentence_nr": 3 + "score": 0.5141279222953337, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.29580528518835375, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.44985936994013187, - "sentence_nr": 3 + "score": 0.46175494867931294, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.34783139668510527, - "sentence_nr": 3 + "score": 0.2460137257692754, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.44014965506542114, - "sentence_nr": 3 + "score": 0.49701225041511987, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.434975077577922, - "sentence_nr": 3 + "score": 0.11902001907030836, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.47044769122745783, - "sentence_nr": 3 + "score": 0.37006869202652387, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.3340174053783229, - "sentence_nr": 3 + "score": 0.09970617113450042, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.38192575633810405, - "sentence_nr": 3 + "score": 0.4157699695564606, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.25678404806291744, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4451755318490681, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.4070491655025483, - "sentence_nr": 3 + "score": 0.1691896235111172, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.42034007595588657, - "sentence_nr": 3 + "score": 0.4942911926258494, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.31029223530511546, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.20783608924923047, - "sentence_nr": 3 + "score": 0.49153218387646985, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.30250701195783797, - "sentence_nr": 3 + "score": 0.25748661016289676, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.3647049156958342, - "sentence_nr": 3 + "score": 0.5470575132556847, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.3931807596037881, - "sentence_nr": 3 + "score": 0.29041376174024985, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.3863308383621456, - "sentence_nr": 3 + "score": 0.44402718863249474, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 + "score": 0.37652399404345116, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.46641278921549706, - "sentence_nr": 3 + "score": 0.42944002472700066, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 + "score": 0.2833048493206313, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.46229900712285454, - "sentence_nr": 3 + "score": 0.43070982670283003, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.3917196589390866, - "sentence_nr": 3 + "score": 0.28061557610476007, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.4177131012054097, - "sentence_nr": 3 + "score": 0.560457154689433, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.40842567407749947, - "sentence_nr": 3 + "score": 0.34973466192007374, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.40069931216675575, - "sentence_nr": 3 + "score": 0.5437131659817407, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.3931807596037881, - "sentence_nr": 3 + "score": 0.21349332971500937, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.38977878583131187, - "sentence_nr": 3 + "score": 0.43940628135351467, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.3909779796089584, - "sentence_nr": 3 + "score": 0.2722704374402053, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.4552666486837008, - "sentence_nr": 3 + "score": 0.4630549375673026, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.39022736644855677, - "sentence_nr": 3 + "score": 0.2516546237169354, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.469958902604639, - "sentence_nr": 3 + "score": 0.4948297815905772, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.2862280732631754, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.37692927868434317, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.36343365059404575, - "sentence_nr": 3 + "score": 0.13254512146117509, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4025730604768124, - "sentence_nr": 3 + "score": 0.4538619157918892, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.18592370210838877, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.20237126199349462, - "sentence_nr": 3 + "score": 0.4365745598211089, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.2589451141492935, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.28851079284233655, - "sentence_nr": 3 + "score": 0.5931895310669659, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0842816865856957, - "sentence_nr": 3 + "score": 0.2560635441623709, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.2485953030231616, - "sentence_nr": 3 + "score": 0.4780977367484985, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.33626043540904865, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.21057401113505914, - "sentence_nr": 3 + "score": 0.538764562032814, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.08678345215657841, - "sentence_nr": 3 + "score": 0.2560635441623709, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.2461700872648841, - "sentence_nr": 3 + "score": 0.48856557052258637, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0847400567745194, - "sentence_nr": 3 + "score": 0.270465701330031, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.24521641593951496, - "sentence_nr": 3 + "score": 0.6186468607030233, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.08892786873926027, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.2461700872648841, - "sentence_nr": 3 + "score": 0.4257336921268292, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.20652562031481567, - "sentence_nr": 3 + "score": 0.38564750221750704, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.11755743200908036, - "sentence_nr": 3 + "score": 0.20022731236728245, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.31066419642253373, - "sentence_nr": 3 + "score": 0.42151488713496327, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3538049406567624, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.24865388866098986, - "sentence_nr": 3 + "score": 0.5074236666326171, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.01634489871441457, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.08581896038434547, - "sentence_nr": 3 + "score": 0.3353862189849067, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.06337708222967141, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.19580581055921326, - "sentence_nr": 3 + "score": 0.6098504982416948, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.09604819623436828, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.20999393339350345, - "sentence_nr": 3 + "score": 0.6548615789183071, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.15713284450781043, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.2936101635241035, - "sentence_nr": 3 + "score": 0.5395661538587241, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.26837663158331726, - "sentence_nr": 3 + "score": 0.15935678781360804, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.36155324846955933, - "sentence_nr": 3 + "score": 0.4521793911256492, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.18765818045694288, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.36437642049863195, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.3525678415060714, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.543942790381318, - "sentence_nr": 3 + "score": 0.3612915461819799, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.6676191940689508, - "sentence_nr": 3 + "score": 0.013618788145100353, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.8174079050545816, - "sentence_nr": 3 + "score": 0.08603830819648207, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.1671164029594426, - "sentence_nr": 3 + "score": 0.3014351160013074, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.43350115970032554, - "sentence_nr": 3 + "score": 0.3460021525794904, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.14054613281857953, - "sentence_nr": 3 + "score": 0.07403266639469167, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.31166159977702695, - "sentence_nr": 3 + "score": 0.14081073758628304, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.32836120166564164, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.39814417587130846, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.6449325609322236, - "sentence_nr": 3 + "score": 0.3353163646582782, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.25516833112291476, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.19882981891203355, - "sentence_nr": 3 + "score": 0.11679716931522291, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.39881128802168136, - "sentence_nr": 3 + "score": 0.3007876434578066, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.1569912440358649, - "sentence_nr": 3 + "score": 0.30300203366197276, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.233058107264253, - "sentence_nr": 3 + "score": 0.4326017575964093, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.22840134993478534, - "sentence_nr": 3 + "score": 0.23310335053954467, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.13733894353973466, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.2628097872401104, - "sentence_nr": 3 + "score": 0.524222022655068, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.4013786550577509, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.7311884014618186, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.3925121365052661, - "sentence_nr": 3 + "score": 0.17895706401541522, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.47722987146488, - "sentence_nr": 3 + "score": 0.468904416935942, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.21620508650490267, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.3140933570370138, - "sentence_nr": 3 + "score": 0.22221780818599915, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.15911783110981517, - "sentence_nr": 3 + "score": 0.14198034060416528, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.2929026229365629, - "sentence_nr": 3 + "score": 0.30810954638924765, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.11371681934875245, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.1736078338284175, - "sentence_nr": 3 + "score": 0.1958174339824296, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.42988105429544615, - "sentence_nr": 3 + "score": 0.17923344640485428, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.5765642529796587, - "sentence_nr": 3 + "score": 0.4857079343907557, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.17966302274629617, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.34343300645922903, - "sentence_nr": 3 + "score": 0.3532706340602244, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.10754999784156925, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.22467133896316752, - "sentence_nr": 3 + "score": 0.36722366675256907, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.29042767355630905, - "sentence_nr": 3 + "score": 0.3066439134043059, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3820449807327681, - "sentence_nr": 3 + "score": 0.6338650948810797, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.2494483948151807, - "sentence_nr": 3 + "score": 0.5452372526683829, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.2826410419631889, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.33386474883709644, - "sentence_nr": 3 + "score": 0.5425084953777174, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.3352430929236216, - "sentence_nr": 3 + "score": 0.28875537786443695, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.37654805257811624, - "sentence_nr": 3 + "score": 0.47629875969455604, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.37284875432797243, - "sentence_nr": 3 + "score": 0.11141706023396854, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.46330437039257283, - "sentence_nr": 3 + "score": 0.39859597782994555, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.14858713442145016, - "sentence_nr": 3 + "score": 0.2345085086564769, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.26570814480513566, - "sentence_nr": 3 + "score": 0.49068926722541634, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.3917196589390866, - "sentence_nr": 3 + "score": 0.7532851335428472, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.4937847892557821, - "sentence_nr": 3 + "score": 0.9428927186852146, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.3203759015876555, - "sentence_nr": 3 + "score": 0.6575909538628469, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.36881913952874323, - "sentence_nr": 3 + "score": 0.8936217179505701, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.3595597536132021, - "sentence_nr": 3 + "score": 0.5143395284275514, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.45538141531674614, - "sentence_nr": 3 + "score": 0.7123933129689966, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.7123666275414222, - "sentence_nr": 3 + "score": 0.142420821179659, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5423007037746893, - "sentence_nr": 3 + "score": 0.2037070724003624, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.08822666853435472, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.4332008456263299, - "sentence_nr": 3 + "score": 0.26516858757309264, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.5055542871350683, - "sentence_nr": 3 + "score": 0.4788683350246076, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.2001670147593021, - "sentence_nr": 3 + "score": 0.285255290869053, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.27873308142715214, - "sentence_nr": 3 + "score": 0.5150277853121517, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.3198803457323068, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.36570299791317035, - "sentence_nr": 3 + "score": 0.2918097135253056, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.2061927630692647, - "sentence_nr": 3 + "score": 0.2659411626151445, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.28624131346749065, - "sentence_nr": 3 + "score": 0.5505575080111372, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.47046185035490873, - "sentence_nr": 3 + "score": 0.36194410357208756, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.6292601141059937, - "sentence_nr": 3 + "score": 0.598992373933567, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.25983833013159885, - "sentence_nr": 3 + "score": 0.28892136400127594, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.3730064448362738, - "sentence_nr": 3 + "score": 0.4814801693556659, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.21849712035317273, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.3365794075261624, - "sentence_nr": 3 + "score": 0.2533807666144747, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.11902001907030836, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.2615385026948039, - "sentence_nr": 3 + "score": 0.3199679830665405, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.13733894353973466, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.22471476580349214, - "sentence_nr": 3 + "score": 0.24713133286179484, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.41546060026113085, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.5135071749227893, - "sentence_nr": 3 + "score": 0.5503431628224418, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.48195116293616075, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.629169893832561, - "sentence_nr": 3 + "score": 0.4965709608247565, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.14643937864373885, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.25912071818960414, - "sentence_nr": 3 + "score": 0.35395850928300787, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.15110567441923345, - "sentence_nr": 3 + "score": 0.5512586832695557, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.260101900069982, - "sentence_nr": 3 + "score": 0.6913832239874317, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3815680677485232, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.027170554671717636, - "sentence_nr": 3 + "score": 0.5272796429919636, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.16436148154531297, - "sentence_nr": 3 + "score": 0.5533750148426995, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.3023015988751768, - "sentence_nr": 3 + "score": 0.6820863084799114, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.0926947735155968, - "sentence_nr": 3 + "score": 0.18239350853723665, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.27152680319064787, - "sentence_nr": 3 + "score": 0.5454029225665398, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.22886565624051966, - "sentence_nr": 3 + "score": 0.4051110969828215, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.09769805815777928, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.2758478148913772, - "sentence_nr": 3 + "score": 0.2210196346762089, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.31461500509304885, - "sentence_nr": 3 + "score": 0.1460968145362594, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.4788526616838816, - "sentence_nr": 3 + "score": 0.4638747721441993, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.08047798253159327, - "sentence_nr": 3 + "score": 0.11653907453915101, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.3229186657362169, - "sentence_nr": 3 + "score": 0.3645009427553585, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.20241776365171346, - "sentence_nr": 3 + "score": 0.31718083000697217, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.18661962390369358, - "sentence_nr": 3 + "score": 0.2534148517150881, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.39133469696528195, - "sentence_nr": 3 + "score": 0.4516314349242959, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.15612566417343207, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.2768820552331134, - "sentence_nr": 3 + "score": 0.3498678433566786, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.19838325645746463, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.07229703035677075, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2726328932076192, - "sentence_nr": 3 + "score": 0.3953826692542057, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.015600197098512953, - "sentence_nr": 3 + "score": 0.32347674230893503, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.11414633188690328, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ny", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2637316604062493, - "sentence_nr": 3 + "score": 0.3581157136337445, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.15867077366552554, - "sentence_nr": 3 + "score": 0.08840994001530046, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2515767320274808, - "sentence_nr": 3 + "score": 0.44402726611329, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.21300958856758825, - "sentence_nr": 3 + "score": 0.424690156353477, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.10629625019345329, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2894452751925746, - "sentence_nr": 3 + "score": 0.38370990452048254, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.16368118043487423, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.2975281469582198, - "sentence_nr": 3 + "score": 0.41229278985309037, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.09992869870690119, - "sentence_nr": 3 + "score": 0.12231772169405124, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.2643750418939446, - "sentence_nr": 3 + "score": 0.4262272217222601, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.19787017233255969, - "sentence_nr": 3 + "score": 0.42709235715262367, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.18167706997684474, - "sentence_nr": 3 + "score": 0.15846019094564587, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.3278493462869521, - "sentence_nr": 3 + "score": 0.37619226030999653, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.12076200198832092, - "sentence_nr": 3 + "score": 0.21950630289046016, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.30895143622174265, - "sentence_nr": 3 + "score": 0.49771455694254846, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.14823537448657864, - "sentence_nr": 3 + "score": 0.26216883254914064, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.26063968470685284, - "sentence_nr": 3 + "score": 0.5516544064070883, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.14710052131359536, - "sentence_nr": 3 + "score": 0.4639491362210437, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.2539053433397206, - "sentence_nr": 3 + "score": 0.5584011888296856, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.33351685300508205, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.16217631902430296, - "sentence_nr": 3 + "score": 0.46858342159783783, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.25122013507115176, - "sentence_nr": 3 + "score": 0.13203551444458209, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.31099162791754675, - "sentence_nr": 3 + "score": 0.2799958478061631, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.5181212746323438, - "sentence_nr": 3 + "score": 0.26763155480958284, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.5589229357546774, - "sentence_nr": 3 + "score": 0.4828657076086828, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.21339518760946108, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.5295425318265925, - "sentence_nr": 3 + "score": 0.4004950435730618, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.27004391647712717, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.7155411017347171, - "sentence_nr": 3 + "score": 0.4084384842894723, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.5388058943574727, - "sentence_nr": 3 + "score": 0.2028736642487601, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.580739671272447, - "sentence_nr": 3 + "score": 0.5137897386967579, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.40214612768560637, - "sentence_nr": 3 + "score": 0.19696330878717075, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.4450640488450856, - "sentence_nr": 3 + "score": 0.36322038790342054, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.5191178952716458, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.542900586633014, - "sentence_nr": 3 + "score": 0.2543081796362572, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.7123666275414222, - "sentence_nr": 3 + "score": 0.1458629230810952, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.5365920629514802, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.6337747731257387, - "sentence_nr": 3 + "score": 0.1596451070396453, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.15005674613274578, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.6141900265782613, - "sentence_nr": 3 + "score": 0.4083997317231223, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.17571400908328694, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.15145444933520683, - "sentence_nr": 3 + "score": 0.4563418609671908, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.2826410419631889, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.3732494235741262, - "sentence_nr": 3 + "score": 0.29482283350371896, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.21333164424828907, - "sentence_nr": 3 + "score": 0.5278627722123207, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.32785783444873706, - "sentence_nr": 3 + "score": 0.6793045635533688, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.4825755887985002, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5834117627541725, - "sentence_nr": 3 + "score": 0.4291451835074721, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.4427274357129559, - "sentence_nr": 3 + "score": 0.341191474032336, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.48566583494323345, - "sentence_nr": 3 + "score": 0.4705433400943735, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.3215000448278979, - "sentence_nr": 3 + "score": 0.3387562718376491, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.408098151133905, - "sentence_nr": 3 + "score": 0.4907254093520437, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.11902001907030836, - "sentence_nr": 3 + "score": 0.14100024578768863, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.21131969135775222, - "sentence_nr": 3 + "score": 0.38811765582682867, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.13796620851017116, - "sentence_nr": 3 + "score": 0.42047387366535105, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.2991162156594185, - "sentence_nr": 3 + "score": 0.6070400562507011, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.48195116293616075, - "sentence_nr": 3 + "score": 0.4393160369685383, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.5672887153097735, - "sentence_nr": 3 + "score": 0.8033601095520951, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.27560016787236363, - "sentence_nr": 3 + "score": 0.28051550320162943, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.44835630608859733, - "sentence_nr": 3 + "score": 0.5856239602984719, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.419468515826214, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.7306819794111177, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.39880891978177824, - "sentence_nr": 3 + "score": 0.16953891950751387, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.4801322208759939, - "sentence_nr": 3 + "score": 0.4759361360667322, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.17528697866511064, - "sentence_nr": 3 + "score": 0.42638539122035973, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.2164069950016749, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.3582972930343776, - "sentence_nr": 3 + "score": 0.3979397609409676, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.30166452953199324, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.16312445849704404, - "sentence_nr": 3 + "score": 0.4363965906324271, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.11692421557173073, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.17349790295171436, - "sentence_nr": 3 + "score": 0.2780081083837946, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.12765317762547787, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.20795234713221633, - "sentence_nr": 3 + "score": 0.2742812949424711, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.1274806088188179, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.2180851489906356, - "sentence_nr": 3 + "score": 0.3621464793575014, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.1587094725737821, - "sentence_nr": 3 + "score": 0.31567959179972443, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.1920594666485555, - "sentence_nr": 3 + "score": 0.28374091032340426, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.22939440444042064, - "sentence_nr": 3 + "score": 0.04319276109068939, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.31240704409387926, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.16236645871179417, - "sentence_nr": 3 + "score": 0.5481365257229119, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4207168232226367, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.12285228762352728, - "sentence_nr": 3 + "score": 0.1475816455168176, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.2541335227987007, - "sentence_nr": 3 + "score": 0.5015755484727876, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.11581430177230231, - "sentence_nr": 3 + "score": 0.4154905708443904, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.13310877322785153, - "sentence_nr": 3 + "score": 0.2067973157835718, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.3615855225145535, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.39302513361762836, - "sentence_nr": 3 + "score": 0.14030763768586757, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.5136268735913038, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.7004219512590859, - "sentence_nr": 3 + "score": 0.25421938685023643, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.7183290415445132, - "sentence_nr": 3 + "score": 0.12568752960413668, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.3960970942970261, - "sentence_nr": 3 + "score": 0.15728631043405775, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.5421737755936655, - "sentence_nr": 3 + "score": 0.32952773497361293, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.2557308431384103, - "sentence_nr": 3 + "score": 0.08883879001303638, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.44020046133443264, - "sentence_nr": 3 + "score": 0.33545029773179097, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.13285178183204643, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.22408914992675832, - "sentence_nr": 3 + "score": 0.33120970279184225, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.5440766840557734, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.6486942806598895, - "sentence_nr": 3 + "score": 0.3429418991437785, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.47467913885027985, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.6102089900133376, - "sentence_nr": 3 + "score": 0.3054613864495082, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.32423566021380024, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.3773656939703193, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.5248351209923585, - "sentence_nr": 3 + "score": 0.21388747780355633, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.18082588865422486, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.14757601755321914, - "sentence_nr": 3 + "score": 0.5146867759852178, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.35974578964005544, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.6245567852541115, - "sentence_nr": 3 + "score": 0.21135930387565488, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.33809821343395446, - "sentence_nr": 3 + "score": 0.07849706978299485, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.3414464563275225, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.4182681167677125, - "sentence_nr": 3 + "score": 0.08114854252363812, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.43812558475283875, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.5153914304432097, - "sentence_nr": 3 + "score": 0.0912147892671242, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.3864572432237816, - "sentence_nr": 3 + "score": 0.40684061814045497, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.45154198433087883, - "sentence_nr": 3 + "score": 0.6155459970463403, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.2588173346314121, - "sentence_nr": 3 + "score": 0.5363590525255244, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.34966578410219157, - "sentence_nr": 3 + "score": 0.7030523716437119, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.2668730618874673, - "sentence_nr": 3 + "score": 0.4023605739267744, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.3629773991680274, - "sentence_nr": 3 + "score": 0.6645489548096619, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.4374062018435253, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.5403839144614929, - "sentence_nr": 3 + "score": 0.17443259739075218, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.43098807781921006, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.4998903441953072, - "sentence_nr": 3 + "score": 0.2756167943711338, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.16032342647337483, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.4026617981948598, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.45961131726156146, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2887138086538547, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.200532240765861, - "sentence_nr": 3 + "score": 0.6342291345998248, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.3655280950277252, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.1022763758993479, - "sentence_nr": 3 + "score": 0.20748131961458333, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.26825052055805815, - "sentence_nr": 3 + "score": 0.2716205232346228, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.18808242155433705, - "sentence_nr": 3 + "score": 0.37589902061551017, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.3220587874741547, - "sentence_nr": 3 + "score": 0.42554151277542873, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.1022763758993479, - "sentence_nr": 3 + "score": 0.20748131961458333, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.2693375138315219, - "sentence_nr": 3 + "score": 0.2716205232346228, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.10203198615804732, - "sentence_nr": 3 + "score": 0.4135171000263379, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.26393210689508945, - "sentence_nr": 3 + "score": 0.7050151549073953, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.09868209342887882, - "sentence_nr": 3 + "score": 0.42988105429544615, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.26393210689508945, - "sentence_nr": 3 + "score": 0.7577244658187771, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 3 + "score": 0.5366411241731205, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.26161040297988175, - "sentence_nr": 3 + "score": 0.825566494253596, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6976333495952621, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.09477675811896721, - "sentence_nr": 3 + "score": 0.8331572107884448, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4165530720734658, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.23696160551015213, - "sentence_nr": 3 + "score": 0.7027805129995731, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.4027788021844849, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6872835607174038, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.498704623570665, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.18245400323670688, - "sentence_nr": 3 + "score": 0.6478746389895599, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.12212865548711085, - "sentence_nr": 3 + "score": 0.41307323705325416, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.24707767933143832, - "sentence_nr": 3 + "score": 0.5785653391533346, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.10440864748411478, - "sentence_nr": 3 + "score": 0.5248587176134882, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.2671475795987059, - "sentence_nr": 3 + "score": 0.6664855309004869, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.08023149270718091, - "sentence_nr": 3 + "score": 0.35210829264331733, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.279740710493905, - "sentence_nr": 3 + "score": 0.5239651686730163, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.21330178332703942, - "sentence_nr": 3 + "score": 0.17729842264695017, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.4261888165527193, - "sentence_nr": 3 + "score": 0.4103582047611184, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.06534434987768793, - "sentence_nr": 3 + "score": 0.34895836374229405, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.27176767387111833, - "sentence_nr": 3 + "score": 0.4767378358574124, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.22739562220830448, - "sentence_nr": 3 + "score": 0.833078701050083, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.40909318589710897, - "sentence_nr": 3 + "score": 0.9482515348146272, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.19682432817897016, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.8958039312312598, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.21669634490560397, - "sentence_nr": 3 + "score": 0.9382091007325469, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.41348528734771456, - "sentence_nr": 3 + "score": 0.5805399561362194, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.5036806117103709, - "sentence_nr": 3 + "score": 0.4810464260105228, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.06708839685423082, - "sentence_nr": 3 + "score": 0.4500531895417844, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3186988593970526, - "sentence_nr": 3 + "score": 0.43027065541050147, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.4933292241270431, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5225247297523148, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0686555146522301, - "sentence_nr": 3 + "score": 0.5064127215831256, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.279526430463802, - "sentence_nr": 3 + "score": 0.6516332048338376, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.18786971228693808, - "sentence_nr": 3 + "score": 0.5212982931053122, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.35319015092357736, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.2134163469900347, - "sentence_nr": 3 + "score": 0.5822934956325967, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.16168270317308941, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.26228540738738376, - "sentence_nr": 3 + "score": 0.4592978565863154, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3855522725905196, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.305925215411119, - "sentence_nr": 3 + "score": 0.587260566914102, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.3174603493865962, - "sentence_nr": 3 + "score": 0.4426623526629488, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.465550295868511, - "sentence_nr": 3 + "score": 0.6368371029698285, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.23266541684590059, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.43542584213311014, - "sentence_nr": 3 + "score": 0.518761522736185, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.19650854773882592, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.25767850065044406, - "sentence_nr": 3 + "score": 0.5134302167765095, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.10808631609223593, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.24232742873756352, - "sentence_nr": 3 + "score": 0.518761522736185, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 + "score": 0.5129586382458503, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.46181900132004605, - "sentence_nr": 3 + "score": 0.7857394056399366, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.12874330508144843, - "sentence_nr": 3 + "score": 0.5106109398471469, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.27005885018903275, - "sentence_nr": 3 + "score": 0.7688046995197549, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.44778459441351737, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.7043336945393497, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.08893868599300617, - "sentence_nr": 3 + "score": 0.6535194995338728, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.2614872038666333, - "sentence_nr": 3 + "score": 0.8909391457425937, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6158161554766717, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.09235276591518521, - "sentence_nr": 3 + "score": 0.8568982835533138, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.12577829595095136, - "sentence_nr": 3 + "score": 0.6535194995338728, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.2463514312047734, - "sentence_nr": 3 + "score": 0.8909391457425937, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.25430316746203985, - "sentence_nr": 3 + "score": 0.5852187596735429, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.31361769699186176, - "sentence_nr": 3 + "score": 0.7147018027438421, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.8151678595510182, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.8873630455888943, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.41072675483179805, - "sentence_nr": 3 + "score": 0.8571061116877262, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5635589150380774, - "sentence_nr": 3 + "score": 0.906026511295714, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.4390960897971484, - "sentence_nr": 3 + "score": 0.3682241310101735, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.541742178821102, - "sentence_nr": 3 + "score": 0.5606184355158915, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.25430316746203985, - "sentence_nr": 3 + "score": 0.31684822717918226, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.3843264258161899, - "sentence_nr": 3 + "score": 0.4984008175596484, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.26832828828804234, - "sentence_nr": 3 + "score": 0.31684822717918226, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.4353421873417814, - "sentence_nr": 3 + "score": 0.41779931059703573, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.38216426442206797, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.3090705808198716, - "sentence_nr": 3 + "score": 0.25590356077469273, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.46344802441586025, - "sentence_nr": 3 + "score": 0.4786634069414725, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.41469320341466864, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.4761654595813381, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.6369102574048467, - "sentence_nr": 3 + "score": 0.3162730677141066, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.18676394386940107, - "sentence_nr": 3 + "score": 0.33514014117550306, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.22435510126054356, - "sentence_nr": 3 + "score": 0.3832378601967276, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.3514245731837287, - "sentence_nr": 3 + "score": 0.33488655580657256, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.534130899739072, - "sentence_nr": 3 + "score": 0.7293017619227443, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.3090705808198716, - "sentence_nr": 3 + "score": 0.5709867732881334, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.4997989608278053, - "sentence_nr": 3 + "score": 0.898461683304813, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.3552281813814547, - "sentence_nr": 3 + "score": 0.5009303657723724, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.5479990039688047, - "sentence_nr": 3 + "score": 0.7151387779856716, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.2042128370387497, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.34010033215004876, - "sentence_nr": 3 + "score": 0.4768283301747848, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.21617263218447194, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.39665284207892343, - "sentence_nr": 3 + "score": 0.5294365116477578, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.27449463298776555, - "sentence_nr": 3 + "score": 0.4115919883568686, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.35663228170028305, - "sentence_nr": 3 + "score": 0.32685141385924577, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.4810110961578451, - "sentence_nr": 3 + "score": 0.70370764261192, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.3493450883427864, - "sentence_nr": 3 + "score": 0.47331152665045856, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.5052914563530501, - "sentence_nr": 3 + "score": 0.8148863881905419, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.20352458275127414, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6347163227772256, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.3072646319682134, - "sentence_nr": 3 + "score": 0.2175445226496326, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5375887148749452, - "sentence_nr": 3 + "score": 0.4621999154927885, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3449754050963302, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.21468806635443977, - "sentence_nr": 3 + "score": 0.5521970778133709, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.16711337215358957, - "sentence_nr": 3 + "score": 0.20304086026162935, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.3104580254514014, - "sentence_nr": 3 + "score": 0.42692494747660203, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.264371505578968, - "sentence_nr": 3 + "score": 0.794834366062997, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.3692663913160793, - "sentence_nr": 3 + "score": 0.7747058711066555, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.794834366062997, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.40358627497421223, - "sentence_nr": 3 + "score": 0.7747058711066555, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.15604242268653643, - "sentence_nr": 3 + "score": 0.794834366062997, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.35440326623172935, - "sentence_nr": 3 + "score": 0.7747058711066555, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.2590924722191636, - "sentence_nr": 3 + "score": 0.28688236146427454, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.34962994893205634, - "sentence_nr": 3 + "score": 0.30588462336040045, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.20466509965242627, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.38543831682371826, - "sentence_nr": 3 + "score": 0.13421952443691573, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.1261312269838889, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.29273546001876816, - "sentence_nr": 3 + "score": 0.1672261435520376, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.1850334785266634, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.3538829072573592, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.4576280788857466, - "sentence_nr": 3 + "score": 0.22109333005198922, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.00625904491799232, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.042587313196008975, - "sentence_nr": 3 + "score": 0.165099903439123, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.25944320225692963, - "sentence_nr": 3 + "score": 0.3190887215348668, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.36740468766073175, - "sentence_nr": 3 + "score": 0.4863428828999999, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.15888556418399724, - "sentence_nr": 3 + "score": 0.5526778778205866, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.25359546478986267, - "sentence_nr": 3 + "score": 0.7267402732246275, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.2442195895839763, - "sentence_nr": 3 + "score": 0.4779289657345161, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.33985143707913, - "sentence_nr": 3 + "score": 0.6131584716544094, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.13536681105774234, - "sentence_nr": 3 + "score": 0.6298777848185599, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.29163125383681, - "sentence_nr": 3 + "score": 0.8784032559879915, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.6052987576779449, - "sentence_nr": 3 + "score": 0.4721001028256397, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.6809283802101068, - "sentence_nr": 3 + "score": 0.7096678127067526, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.29580528518835375, - "sentence_nr": 3 + "score": 0.4336560555138203, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.4152245863377912, - "sentence_nr": 3 + "score": 0.8096970872393391, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.29580528518835375, - "sentence_nr": 3 + "score": 0.285255290869053, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.4152245863377912, - "sentence_nr": 3 + "score": 0.40782828620425265, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.30128865413061245, - "sentence_nr": 3 + "score": 0.13072010980679707, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.3577177318823599, - "sentence_nr": 3 + "score": 0.3105434451119841, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.13796620851017113, - "sentence_nr": 3 + "score": 0.11967758950157248, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.2971316298359249, - "sentence_nr": 3 + "score": 0.2800855102887508, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.4772894233335957, - "sentence_nr": 3 + "score": 0.2320305803246989, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.623743257110184, - "sentence_nr": 3 + "score": 0.5875457251146547, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.3076805720186954, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.5322299798274237, - "sentence_nr": 3 + "score": 0.37453249276032596, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.13471766853689124, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.02967152553066799, - "sentence_nr": 3 + "score": 0.5276176357172093, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.4587032440161705, - "sentence_nr": 3 + "score": 0.2293648308471323, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5453529746009712, - "sentence_nr": 3 + "score": 0.4184099073786108, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3199526749905591, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.02629356643029618, - "sentence_nr": 3 + "score": 0.46625097311775787, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.2585740748062296, - "sentence_nr": 3 + "score": 0.43437903147819623, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.108043996762779, - "sentence_nr": 3 + "score": 0.37595662994657586, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.2627337195947467, - "sentence_nr": 3 + "score": 0.5278838809443244, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.18085702029043885, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.338987683983403, - "sentence_nr": 3 + "score": 0.4012179884342934, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.12829843029207522, - "sentence_nr": 3 + "score": 0.1751062735415077, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.2926652353247206, - "sentence_nr": 3 + "score": 0.48664097065107126, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 3 + "score": 0.5129484309843931, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.27948736250364437, - "sentence_nr": 3 + "score": 0.6268186944844665, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.10490894282140378, - "sentence_nr": 3 + "score": 0.4111336169005197, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.2705485193181514, - "sentence_nr": 3 + "score": 0.654151133443915, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.07861539293399739, - "sentence_nr": 3 + "score": 0.3911104256806521, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.29011304888643985, - "sentence_nr": 3 + "score": 0.7162899586182385, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.3044918933465557, - "sentence_nr": 3 + "score": 0.23114663823833642, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4630596968724663, - "sentence_nr": 3 + "score": 0.4894179053435348, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.13566979610140004, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.25686963328488466, - "sentence_nr": 3 + "score": 0.38699631006193164, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.1434533437144611, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4325933876557919, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.12829843029207522, - "sentence_nr": 3 + "score": 0.32282138800401855, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.2917663610295337, - "sentence_nr": 3 + "score": 0.5545363548814882, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.30702571862234085, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.1956251535458609, - "sentence_nr": 3 + "score": 0.5270937279585078, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.11092770141728163, - "sentence_nr": 3 + "score": 0.32282138800401855, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.26818571204274316, - "sentence_nr": 3 + "score": 0.5545363548814882, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2520126751139802, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.16516473320936778, - "sentence_nr": 3 + "score": 0.4174826402445743, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.11146727460890443, - "sentence_nr": 3 + "score": 0.28615556452106294, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.29428893607214085, - "sentence_nr": 3 + "score": 0.46306995992822714, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.10640850690356463, - "sentence_nr": 3 + "score": 0.2291421308389693, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.2838000569859586, - "sentence_nr": 3 + "score": 0.4574939601395284, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.10866043914193523, - "sentence_nr": 3 + "score": 0.2286960004658595, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.23638081554717555, - "sentence_nr": 3 + "score": 0.4746698507981419, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2898497051727735, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.15650329606801927, - "sentence_nr": 3 + "score": 0.4696277526071703, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4010889714538991, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.22598986047875458, - "sentence_nr": 3 + "score": 0.5073411865094402, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.2159287855104448, - "sentence_nr": 3 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.35640441228857384, - "sentence_nr": 3 + "score": 0.8915710595176098, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.1607479526054687, - "sentence_nr": 3 + "score": 0.8131122830944344, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.7124633354895817, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.8131122830944344, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2152724303952775, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.20326207399228274, - "sentence_nr": 3 + "score": 0.414196892394643, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.164176659908094, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.09954109933243607, - "sentence_nr": 3 + "score": 0.41455341963750286, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.11824658049755846, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.154567666908047, - "sentence_nr": 3 + "score": 0.3786431496602672, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.12987293870549732, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.28422427146147505, - "sentence_nr": 3 + "score": 0.3309820868794149, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.14849103164051436, - "sentence_nr": 3 + "score": 0.18931747781986427, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.30883024781428503, - "sentence_nr": 3 + "score": 0.3742105986611897, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.1934884374107349, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.40687776179631713, - "sentence_nr": 3 + "score": 0.23318982747725595, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.26598871140611724, - "sentence_nr": 3 + "score": 0.17210731663474327, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.4502776851570257, - "sentence_nr": 3 + "score": 0.39634356136021137, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.07984434410510546, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.2899871712096496, - "sentence_nr": 3 + "score": 0.3416664930284698, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.08096470168539781, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.29129043241292674, - "sentence_nr": 3 + "score": 0.24371084210185487, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.41165970065973745, - "sentence_nr": 3 + "score": 0.24769802565621082, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.5948213301983136, - "sentence_nr": 3 + "score": 0.4703337695910655, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.48485170206034506, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.22270385608874566, - "sentence_nr": 3 + "score": 0.6080349479704925, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.24671763489589052, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4549467666840918, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.10527834487585676, - "sentence_nr": 3 + "score": 0.36140837993649927, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.27391969966031443, - "sentence_nr": 3 + "score": 0.5149871064681609, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.34185025753461284, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.07858086105703722, - "sentence_nr": 3 + "score": 0.48686023881748675, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.10795293558053044, - "sentence_nr": 3 + "score": 0.3232534820704333, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.27042845399029614, - "sentence_nr": 3 + "score": 0.48025544762616995, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.24344044484205296, - "sentence_nr": 3 + "score": 0.3100943381862644, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.36944883808094725, - "sentence_nr": 3 + "score": 0.416673187106377, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.16436148154531297, - "sentence_nr": 3 + "score": 0.24835336815593242, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.3129244553794762, - "sentence_nr": 3 + "score": 0.3892570202624745, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.3545649986147617, - "sentence_nr": 3 + "score": 0.36880864175927275, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.4749542277669906, - "sentence_nr": 3 + "score": 0.46290260506783343, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.2517043491414655, - "sentence_nr": 3 + "score": 0.22759483012793272, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.37468110318084064, - "sentence_nr": 3 + "score": 0.7746087722406751, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.2753358028987337, - "sentence_nr": 3 + "score": 0.15821285888349262, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.4077441674615824, - "sentence_nr": 3 + "score": 0.5956452084834666, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.2250265947708922, - "sentence_nr": 3 + "score": 0.24065223308491276, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.38424599049509484, - "sentence_nr": 3 + "score": 0.7895361122068094, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.29113216578145623, - "sentence_nr": 3 + "score": 0.4883033428624341, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.42679430664357537, - "sentence_nr": 3 + "score": 0.7631727192799108, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.2618449527244832, - "sentence_nr": 3 + "score": 0.24895494253879688, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3726299296957476, - "sentence_nr": 3 + "score": 0.5641428028936958, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.47689738482638455, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6964624392461141, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.16467029855845897, - "sentence_nr": 3 + "score": 0.2509163300522903, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.3263521700764589, - "sentence_nr": 3 + "score": 0.40768446327474644, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.17034799410713272, - "sentence_nr": 3 + "score": 0.13738108161269025, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.28984287829971883, - "sentence_nr": 3 + "score": 0.3092490317326459, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.2721328088738599, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.3634848474258691, - "sentence_nr": 3 + "score": 0.2605680717060868, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.15326140415213751, - "sentence_nr": 3 + "score": 0.3964513253420688, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3238101789644524, - "sentence_nr": 3 + "score": 0.5920251300199506, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.18815571743190213, - "sentence_nr": 3 + "score": 0.22872196013470597, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.375522612679117, - "sentence_nr": 3 + "score": 0.5536738597162264, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.1543252261021413, - "sentence_nr": 3 + "score": 0.41584686600540666, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3572407559404224, - "sentence_nr": 3 + "score": 0.5942731412808763, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.19035778476657214, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.45948305812456913, - "sentence_nr": 3 + "score": 0.24524653576725172, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.09001463882570905, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.2264618820011072, - "sentence_nr": 3 + "score": 0.35606470318621675, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.14893983010707912, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.3389883503505506, - "sentence_nr": 3 + "score": 0.22131099289519415, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.20638636014941364, - "sentence_nr": 3 + "score": 0.12085824721993538, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.427660959032505, - "sentence_nr": 3 + "score": 0.3494963045761496, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.1572175759719851, - "sentence_nr": 3 + "score": 0.38068555329170634, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.3450580718329653, - "sentence_nr": 3 + "score": 0.4951270565975413, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.3046160524722292, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5006657499823403, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.8895260356363631, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.32441734031006125, - "sentence_nr": 3 + "score": 0.9602241494544458, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.9574533680683809, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.002360717658168083, - "sentence_nr": 3 + "score": 0.9651456696916643, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4772894233335957, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.2152691957249421, - "sentence_nr": 3 + "score": 0.6802859471885812, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.3352430929236216, - "sentence_nr": 3 + "score": 0.501310627231278, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.41922948177882463, - "sentence_nr": 3 + "score": 0.6497159142719684, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.2666090188234886, - "sentence_nr": 3 + "score": 0.5995278533842179, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.4310539878732571, - "sentence_nr": 3 + "score": 0.6317950587232596, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.4004456831424889, - "sentence_nr": 3 + "score": 0.4622377023605667, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.5299539464991493, - "sentence_nr": 3 + "score": 0.5631693837721754, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.3960970942970261, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.5007789387798656, - "sentence_nr": 3 + "score": 0.44621027303028116, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.37484914926654, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.4826432834392733, - "sentence_nr": 3 + "score": 0.5218244384185079, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.4369942407063455, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.46555087927121475, - "sentence_nr": 3 + "score": 0.38430061934674387, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.4374126034082234, - "sentence_nr": 3 + "score": 0.49614858773984216, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.4706011613514873, - "sentence_nr": 3 + "score": 0.6104981438934256, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.30778741582971547, - "sentence_nr": 3 + "score": 0.479859141564773, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.41913422458998256, - "sentence_nr": 3 + "score": 0.5641001468407324, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.3774814765934677, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4990116884547619, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.36210097004176117, - "sentence_nr": 3 + "score": 0.3623885503140912, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.422634223525565, - "sentence_nr": 3 + "score": 0.591097975732367, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.18207052811092134, - "sentence_nr": 3 + "score": 0.22116603634260015, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.2690092394312927, - "sentence_nr": 3 + "score": 0.46760899549728513, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.23715535229161464, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.35918995475524507, - "sentence_nr": 3 + "score": 0.2699836537150582, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.3398088489694245, - "sentence_nr": 3 + "score": 0.18556675930109115, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.4228308786458922, - "sentence_nr": 3 + "score": 0.5038073544487811, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.2927926577346015, - "sentence_nr": 3 + "score": 0.47396838435140404, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.4165527532106081, - "sentence_nr": 3 + "score": 0.6824513472106343, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.27073362211548463, - "sentence_nr": 3 + "score": 0.21951524426618454, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.3669195863456915, - "sentence_nr": 3 + "score": 0.4465396346138487, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.2608721373229356, - "sentence_nr": 3 + "score": 0.14965975078050625, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.3669195863456915, - "sentence_nr": 3 + "score": 0.4312085220554114, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.27073362211548463, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.3426098433314766, - "sentence_nr": 3 + "score": 0.35270822551668213, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.2310325762059593, - "sentence_nr": 3 + "score": 0.16731078418090023, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.3818848271995484, - "sentence_nr": 3 + "score": 0.3632099783723749, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.3140084866979345, - "sentence_nr": 3 - }, + "score": 0.0, + "sentence_nr": 5 + }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.4201283238904398, - "sentence_nr": 3 + "score": 0.5934737473820053, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.27668736912821895, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.3671983604767805, - "sentence_nr": 3 + "score": 0.5115509728992912, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.38358551832735843, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.2666090188234886, - "sentence_nr": 3 + "score": 0.37297693132140797, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.3625204192727816, - "sentence_nr": 3 + "score": 0.6079938395398949, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.16441798531606866, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.055888558015542704, - "sentence_nr": 3 + "score": 0.4740886351198963, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.24315949752483765, - "sentence_nr": 3 + "score": 0.1050176352370787, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.36212873179586813, - "sentence_nr": 3 + "score": 0.40362680831391573, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.10401577613691954, - "sentence_nr": 3 + "score": 0.501310627231278, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.17463781885740615, - "sentence_nr": 3 + "score": 0.625931526816525, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5995278533842179, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.27890809547716944, - "sentence_nr": 3 + "score": 0.6130905930623375, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.14068535649874328, - "sentence_nr": 3 + "score": 0.6781394283024478, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.21065537154817968, - "sentence_nr": 3 + "score": 0.7591411004037735, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.22291343499214064, - "sentence_nr": 3 + "score": 0.3247977183883475, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.29815406656323407, - "sentence_nr": 3 + "score": 0.5194224997049715, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.108043996762779, - "sentence_nr": 3 + "score": 0.3714775072696357, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.20592612474035338, - "sentence_nr": 3 + "score": 0.6057428008252769, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5504220378177658, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.09695871631707126, - "sentence_nr": 3 + "score": 0.7644237788882867, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 + "score": 0.23529269197764505, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.43266448575617944, - "sentence_nr": 3 + "score": 0.40809201669399076, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.11234905986715489, - "sentence_nr": 3 + "score": 0.14119598284201468, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.16108949081819493, - "sentence_nr": 3 + "score": 0.44173678486543566, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.235761566588968, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.368275540257269, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.4344880644474143, - "sentence_nr": 3 + "score": 0.32474347462386666, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.02072968490878939, - "sentence_nr": 3 + "score": 0.3814040834322927, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.09772992164303729, - "sentence_nr": 3 + "score": 0.1976684645555356, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.17471046691781456, - "sentence_nr": 3 + "score": 0.42459292222966755, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.09207598308796072, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.24553846741883023, - "sentence_nr": 3 + "score": 0.36760622066825005, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.21070435913784732, - "sentence_nr": 3 + "score": 0.24297293935529007, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.1163540245408256, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.2115841789715117, - "sentence_nr": 3 + "score": 0.20550295710786312, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.19264094072473242, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.3181645976891593, - "sentence_nr": 3 + "score": 0.26306143053016545, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.1328981075995534, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.28935468520358737, - "sentence_nr": 3 + "score": 0.3398238198012154, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.09518930981816905, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.19355453531805264, - "sentence_nr": 3 + "score": 0.23650173894542376, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.195647514979229, - "sentence_nr": 3 + "score": 0.5339293213731362, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.33945900257486206, - "sentence_nr": 3 + "score": 0.6871710646631124, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.11238170584522883, - "sentence_nr": 3 + "score": 0.34756561191481233, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.17264259150244354, - "sentence_nr": 3 + "score": 0.6088539252565636, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5257106921837311, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.12861981016228477, - "sentence_nr": 3 + "score": 0.17876827148517369, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.2805631135214855, - "sentence_nr": 3 + "score": 0.6129800176024777, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.34485299147725845, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.12183427844024919, - "sentence_nr": 3 + "score": 0.7179268969430285, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.15494432466984584, - "sentence_nr": 3 + "score": 0.5554063047350235, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.218134321293328, - "sentence_nr": 3 + "score": 0.7122051230572404, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.35541240647259675, - "sentence_nr": 3 + "score": 0.7814624520865153, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.19105600040048565, - "sentence_nr": 3 + "score": 0.8164795700243455, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.41505761608077835, - "sentence_nr": 3 + "score": 0.8966287960686459, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.2064597158958983, - "sentence_nr": 3 + "score": 0.3173673439991377, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.36557785420213534, - "sentence_nr": 3 + "score": 0.5749974286664644, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.3120848453730729, - "sentence_nr": 3 + "score": 0.3904439050342299, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.3751126030933485, - "sentence_nr": 3 + "score": 0.7513905732870811, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.1964771343560535, - "sentence_nr": 3 + "score": 0.6543503796047173, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.36076279170952025, - "sentence_nr": 3 + "score": 0.8630547387568517, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.19147265798368787, - "sentence_nr": 3 + "score": 0.31189530499557644, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.3652160842746311, - "sentence_nr": 3 + "score": 0.6178467600806551, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.43644602255194453, - "sentence_nr": 3 + "score": 0.501310627231278, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.4593205617863297, - "sentence_nr": 3 + "score": 0.6476336500357023, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.11729176379814876, - "sentence_nr": 3 + "score": 0.39363232658946995, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.34619114638131826, - "sentence_nr": 3 + "score": 0.6025455732366968, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.3981101537239463, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5160821572366358, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.2064597158958983, - "sentence_nr": 3 + "score": 0.6163736299428778, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.3640697123638358, - "sentence_nr": 3 + "score": 0.7230511644674203, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.18207052811092134, - "sentence_nr": 3 + "score": 0.5399438397704499, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.2771423653771131, - "sentence_nr": 3 + "score": 0.6978441364968362, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.2897029272018376, - "sentence_nr": 3 + "score": 0.789917682435982, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.41353605973777596, - "sentence_nr": 3 + "score": 0.8127487880440298, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.6743083618671843, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.26808424913615275, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.6185283127358252, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3643840101054471, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3227335009304841, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20019749510803492, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 + "score": 0.4550706657312225, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.2481076483975817, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.38933614220345447, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.7825422900366437, - "sentence_nr": 4 + "score": 0.3226647471194494, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.8503171627677965, - "sentence_nr": 4 + "score": 0.567577341013788, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 + "score": 0.4571698763985791, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 + "score": 0.6457903784448579, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.3467494262331134, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5076258119344784, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.2252707911092971, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3882698759114865, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.25381907882982485, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.2679490624827725, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19062118739377035, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.3561098113330048, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22576206373887328, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.43106919861749643, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.22215063102831487, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15004931283219156, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.43097718132573626, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.5581982021478125, - "sentence_nr": 4 + "score": 0.4902862637317829, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26184495272448327, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5568278384427537, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.36910468947138964, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4910950369957017, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.27057928415461924, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.18263245836368824, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13872638167626056, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.29708830117546026, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.17466448960963754, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26977779776316707, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.49546288984677567, - "sentence_nr": 4 + "score": 0.5502252143009858, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5618268005152494, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.43795381992037963, - "sentence_nr": 4 + "score": 0.6323271459296715, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33459343819025317, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 + "score": 0.5487027389895724, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.46670957224939175, - "sentence_nr": 4 + "score": 0.29699464782947244, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.37544324742239676, - "sentence_nr": 4 + "score": 0.32437584698730715, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.41602211217571683, - "sentence_nr": 4 + "score": 0.3269663027436998, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.33491517492026424, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.714838523727054, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.49713060327965375, - "sentence_nr": 4 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.440129802760994, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.16195570128532405, - "sentence_nr": 4 + "score": 0.3450219162509876, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.581645267684411, - "sentence_nr": 4 + "score": 0.3993348853061597, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5561195823338172, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.429292711066547, - "sentence_nr": 4 + "score": 0.5362935676066722, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.5803515898273521, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.5848202846227532, - "sentence_nr": 4 + "score": 0.5422220468910552, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2562150245540302, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.40854152133685306, - "sentence_nr": 4 + "score": 0.47046477830594896, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33438299066966715, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.41213231348812146, - "sentence_nr": 4 + "score": 0.5409759573191787, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37854068916316835, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.5743796566387722, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5521710658453207, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.34256683873776383, - "sentence_nr": 4 + "score": 0.7317828775912516, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.45506803308128024, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.41477028165511615, - "sentence_nr": 4 + "score": 0.6477506541284608, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.592313615748771, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.4164061298971701, - "sentence_nr": 4 + "score": 0.7382416555842614, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4018202851356865, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.42052258014181687, - "sentence_nr": 4 + "score": 0.6003256951549871, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4018202851356865, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5709936728721758, - "sentence_nr": 4 + "score": 0.6003256951549871, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.580451128369423, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.7542976177437886, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18207052811092134, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.40562290854898025, - "sentence_nr": 4 + "score": 0.4504432021668592, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37717457428685847, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.33546955366063214, - "sentence_nr": 4 + "score": 0.5554130492458337, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.40319099863003527, - "sentence_nr": 4 + "score": 0.31598923484911084, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6425503166524515, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.8078891929749037, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7629273292796576, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.8510385544954956, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.8725129388059689, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 + "score": 0.9495292423959529, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2147607499133801, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3818534926571001, - "sentence_nr": 4 + "score": 0.3976144917079093, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.49349163706233623, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.3644112480028862, - "sentence_nr": 4 + "score": 0.694445271037971, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3037643089519314, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.393379300802006, - "sentence_nr": 4 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183662698462751, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.3958941272081701, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.8253498772794055, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.3892064098781075, - "sentence_nr": 4 + "score": 0.8529564805429163, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7944837206494969, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.3066682918799934, - "sentence_nr": 4 + "score": 0.8784531740275225, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23887527917609022, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.3010381621698183, - "sentence_nr": 4 + "score": 0.5924993690004501, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5828833474188783, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.28783297914763095, - "sentence_nr": 4 + "score": 0.7908226509294533, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.38694317759010316, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.3958941272081701, - "sentence_nr": 4 + "score": 0.5953878513137957, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3009687072297843, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5948724602646328, - "sentence_nr": 4 + "score": 0.5341810386314462, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5040673596100225, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5042211795038526, - "sentence_nr": 4 + "score": 0.6469962279041276, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22816849039973935, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.49849908693271183, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.5138104164912963, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.395494817172382, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5049375875723539, - "sentence_nr": 4 + "score": 0.48210216762305635, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1794560313432444, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.5236301264596329, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16111212240349498, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5582360999449585, - "sentence_nr": 4 + "score": 0.5162765195160328, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23109536367862135, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.48375513642780327, - "sentence_nr": 4 + "score": 0.5693079918450474, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6486932415130529, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.4915933923809756, - "sentence_nr": 4 + "score": 0.788686710424071, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4252891537802403, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.41469341972645324, - "sentence_nr": 4 + "score": 0.6269243845872724, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6486932415130529, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.39451521279220947, - "sentence_nr": 4 + "score": 0.788686710424071, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.27447938256311044, - "sentence_nr": 4 + "score": 0.43937095446369234, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.615291848344044, - "sentence_nr": 4 + "score": 0.46181721677136944, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7490853969372642, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.5516607622642397, - "sentence_nr": 4 + "score": 0.7869453805471358, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4970449067437269, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 + "score": 0.5549084692917513, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3460178707517162, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.5256353512715748, - "sentence_nr": 4 + "score": 0.4777182579951077, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.46463179926223586, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.3765697091436241, - "sentence_nr": 4 + "score": 0.6658302112115018, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.31573558123189943, - "sentence_nr": 4 + "score": 0.30934066139296057, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.6989238098201116, - "sentence_nr": 4 + "score": 0.45568578346907496, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.22894156860669912, - "sentence_nr": 4 + "score": 0.41103851467561064, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.525025800664119, - "sentence_nr": 4 + "score": 0.5051296804607229, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.32375968335328725, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.6245952145297528, - "sentence_nr": 4 + "score": 0.4184987692475953, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26583802173257376, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.44995700110278536, - "sentence_nr": 4 + "score": 0.5022872142270257, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.2680165156355779, - "sentence_nr": 4 + "score": 0.3680806213583401, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.5989264158576341, - "sentence_nr": 4 + "score": 0.7106717690083967, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33573064840973227, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.4425650919372919, - "sentence_nr": 4 + "score": 0.6896084087753593, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 4 + "score": 0.3294509143325626, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.6263180162489238, - "sentence_nr": 4 + "score": 0.5502318837669161, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6102624546684577, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.33762297226992255, - "sentence_nr": 4 + "score": 0.7455382911279661, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6102624546684577, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.4576529535952892, - "sentence_nr": 4 + "score": 0.7455382911279661, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5129914365244811, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 + "score": 0.6719058099803666, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.4726395749383864, - "sentence_nr": 4 + "score": 0.42219590199110324, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18558816000270506, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.40052428191473877, - "sentence_nr": 4 + "score": 0.5082547077193023, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25104615680952314, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.39336600752225864, - "sentence_nr": 4 + "score": 0.5386684376521024, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35573896339264094, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5396070985289769, - "sentence_nr": 4 + "score": 0.43373844074577966, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4158130624959958, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.4101715667811344, - "sentence_nr": 4 + "score": 0.5675993368017754, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.034234536820051814, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5438653754915956, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.45834841871997833, - "sentence_nr": 4 + "score": 0.7302649909837065, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.391080327529236, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.31754227193241025, - "sentence_nr": 4 + "score": 0.6466158388142541, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4464617303464354, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.3974726419025883, - "sentence_nr": 4 + "score": 0.6719518780034501, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.2658483576665877, - "sentence_nr": 4 + "score": 0.47901455811287486, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.6410540990527072, - "sentence_nr": 4 + "score": 0.42929076670455985, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.3964023716675737, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.6374693500772332, - "sentence_nr": 4 + "score": 0.3718976854103865, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.3063889665164676, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 + "score": 0.3060606492644399, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.1714049169603588, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.686947433675709, - "sentence_nr": 4 + "score": 0.22748922852371126, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.4746119151171374, - "sentence_nr": 4 + "score": 0.21305956475594143, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4395285386678049, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.5639241776831634, - "sentence_nr": 4 + "score": 0.5439405004075281, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39022736644855677, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.5639241776831634, - "sentence_nr": 4 + "score": 0.6827449888069321, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5194247346787363, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.5281061979991509, - "sentence_nr": 4 + "score": 0.7440660671846263, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.4093184131170722, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6668099404219522, - "sentence_nr": 4 + "score": 0.6844082266550039, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24268235789067255, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.5554602680850725, - "sentence_nr": 4 + "score": 0.6128667394032248, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.41291750111233794, - "sentence_nr": 4 + "score": 0.4696150968541076, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 + "score": 0.22656720908801994, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.6053635787005981, - "sentence_nr": 4 + "score": 0.631245192197625, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2434623104231637, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.6562641136790542, - "sentence_nr": 4 + "score": 0.44265412961841627, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2882258334128601, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.46426595961938383, - "sentence_nr": 4 + "score": 0.4424981883607872, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2882258334128601, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.4424981883607872, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37257484093951504, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.5542570357348661, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21281701380712922, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.4806367958084579, - "sentence_nr": 4 + "score": 0.5028404616749624, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15295559337528836, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.5013632657267051, - "sentence_nr": 4 + "score": 0.5217846954711376, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.427700699409628, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5066311799500233, - "sentence_nr": 4 + "score": 0.6058578812402458, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4323312773819961, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.4224991954993499, - "sentence_nr": 4 + "score": 0.5997371746139766, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.49935841995256924, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5191362758854317, - "sentence_nr": 4 + "score": 0.7295897846667867, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29622254568370254, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.5379068753129642, - "sentence_nr": 4 + "score": 0.5178269067077038, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16373682488441257, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.3348758882377771, - "sentence_nr": 4 + "score": 0.5249159229096783, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29663479358211337, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.4601349893675622, - "sentence_nr": 4 + "score": 0.5048860636204477, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.5705717737418762, - "sentence_nr": 4 + "score": 0.5616963280794934, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.410846945789476, - "sentence_nr": 4 + "score": 0.4784684190704374, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 + "score": 0.5220744129850714, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.39913709020460375, - "sentence_nr": 4 + "score": 0.42656860819541537, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43846037099743423, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.40443357144012176, - "sentence_nr": 4 + "score": 0.5878523089199363, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3049156495560148, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4121946181418776, - "sentence_nr": 4 + "score": 0.41208793236807006, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6396679416047654, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.4455138569917551, - "sentence_nr": 4 + "score": 0.8130954821598096, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43504038103657183, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.42422145417131013, - "sentence_nr": 4 + "score": 0.6606468296022548, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3265516873506877, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 + "score": 0.5955775050987988, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25916413607200117, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.5606044053771457, - "sentence_nr": 4 + "score": 0.48625815319476634, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26608125176434144, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.40443357144012176, - "sentence_nr": 4 + "score": 0.4802266386987784, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2966090320349725, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.4121946181418776, - "sentence_nr": 4 + "score": 0.4660692265920593, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.19569434861872417, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.5843699612664682, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.6758978744760765, - "sentence_nr": 4 + "score": 0.19696533992407975, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.5293474685884572, - "sentence_nr": 4 + "score": 0.36270408031098256, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.23578316044531808, - "sentence_nr": 4 + "score": 0.7614976958373465, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.5821373704411671, - "sentence_nr": 4 + "score": 0.8451805980092224, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.14528679532351443, - "sentence_nr": 4 + "score": 0.8578928092681435, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.514952316880994, - "sentence_nr": 4 + "score": 0.8885860675423041, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7483293841345244, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.3128496839849598, - "sentence_nr": 4 + "score": 0.8285318099866168, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.4615425015629849, - "sentence_nr": 4 + "score": 0.287686670403962, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.7427658453867285, - "sentence_nr": 4 + "score": 0.5254267710927438, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1954598432860746, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.6371798394308665, - "sentence_nr": 4 + "score": 0.5357843357785438, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12050640089951394, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.30941048637024005, - "sentence_nr": 4 + "score": 0.46279589321598746, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3723091902601643, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.6423124418413864, - "sentence_nr": 4 + "score": 0.49708772465236206, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5196976495259397, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.2970314818988727, - "sentence_nr": 4 + "score": 0.5717810489005831, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24010981785257499, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.3292499962917628, - "sentence_nr": 4 + "score": 0.3038497620512514, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.3866478782027105, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.4410492519530161, - "sentence_nr": 4 + "score": 0.34136898121736164, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.4429196299668147, - "sentence_nr": 4 + "score": 0.37814378041895597, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12092350598744767, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.2611572619156785, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19422069098763883, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.47465074831919213, - "sentence_nr": 4 + "score": 0.3990712046437768, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.30688403446731133, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.40760363566240154, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5022374395202223, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.4384930065736907, - "sentence_nr": 4 + "score": 0.7482826302429207, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5696767062759219, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.47465074831919213, - "sentence_nr": 4 + "score": 0.7243872738436123, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5418739057997506, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.4373156210032521, - "sentence_nr": 4 + "score": 0.7260522735532127, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.44830378475308, - "sentence_nr": 4 + "score": 0.36168074485009266, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.2623399284064729, - "sentence_nr": 4 + "score": 0.2876470433414085, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.4373156210032521, - "sentence_nr": 4 + "score": 0.3597009865964047, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6496072343867388, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.7860533539734559, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5049567629086866, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.3844263765000694, - "sentence_nr": 4 + "score": 0.6875175437591383, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5856596027429395, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.6515526423776998, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.44363569462622243, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3805770883173698, - "sentence_nr": 4 + "score": 0.7152578718641567, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43527713779415106, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.6492012081783873, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4239615629447832, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.6750973651130794, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.28770035113017345, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.39818525322365445, - "sentence_nr": 4 + "score": 0.4824112481129573, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27876286341624873, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.3844263765000694, - "sentence_nr": 4 + "score": 0.47006631105227237, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27876286341624873, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.4719699809379796, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7288605134576496, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.8188207446751512, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6725157402359803, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.7771536167828895, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.9067110266941047, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.39818525322365445, - "sentence_nr": 4 + "score": 0.9136710157809076, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.3511161927593497, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.44838119776698643, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.33088444297120406, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.6474126202050918, - "sentence_nr": 4 + "score": 0.4331966918623851, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.16570229441828108, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.5183282721440023, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.7899968492664415, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4929664394953523, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.5521590062829653, - "sentence_nr": 4 + "score": 0.7163611428663252, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.419793811546288, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.6131017059052001, - "sentence_nr": 4 + "score": 0.7030875221285298, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.4071651792954847, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.5696223051382897, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3162277660168379, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.6131017059052001, - "sentence_nr": 4 + "score": 0.5598397806456028, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.35870004213153, - "sentence_nr": 4 + "score": 0.5087423866211807, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.697021248528644, - "sentence_nr": 4 + "score": 0.6733471820130715, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.6131017059052001, - "sentence_nr": 4 + "score": 0.25797975216980157, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.1423071532720465, - "sentence_nr": 4 + "score": 0.16727613178248177, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.5673078468780355, - "sentence_nr": 4 + "score": 0.3242750412187524, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.19923405658137924, - "sentence_nr": 4 + "score": 0.11856660123276004, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.6211036406023237, - "sentence_nr": 4 + "score": 0.24749625714009152, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.4637878319059324, - "sentence_nr": 4 + "score": 0.45242529056256514, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.6919476196061328, - "sentence_nr": 4 + "score": 0.6590255708265333, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.3446073377034663, - "sentence_nr": 4 + "score": 0.29591590531645884, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.7621696379946562, - "sentence_nr": 4 + "score": 0.6473269347851542, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.3237722713145643, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.7426638026175545, - "sentence_nr": 4 + "score": 0.3789185999473403, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.37030468338190614, - "sentence_nr": 4 + "score": 0.9000001338525341, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.7587397825317436, - "sentence_nr": 4 + "score": 0.9362876355864519, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7151826245402508, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.545311114945696, - "sentence_nr": 4 + "score": 0.7968074166631015, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.9000001338525341, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.6120635842558794, - "sentence_nr": 4 + "score": 0.9362876355864519, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.5303624596095554, - "sentence_nr": 4 + "score": 0.18157374168582124, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.7835371347721495, - "sentence_nr": 4 + "score": 0.3223976983915171, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22215846062674394, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.5658596262915636, - "sentence_nr": 4 + "score": 0.4414547021536916, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.3082642374230144, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.45792726647338766, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.1457684614972261, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.5701800421590155, - "sentence_nr": 4 + "score": 0.3035835725488823, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.1711057433668069, - "sentence_nr": 4 + "score": 0.31652994594299083, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.1531682455208201, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.6009917293478183, - "sentence_nr": 4 + "score": 0.27254857143793515, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2698809072033866, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.13369377363079382, - "sentence_nr": 4 + "score": 0.4390426411234037, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.25947507140745757, - "sentence_nr": 4 + "score": 0.13952118378975725, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.6659437947666702, - "sentence_nr": 4 + "score": 0.4232954460226121, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.49342175914364256, - "sentence_nr": 4 + "score": 0.23979583131036644, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19120817575042512, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.4754189767029448, - "sentence_nr": 4 + "score": 0.4557967473618706, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.46847165370535515, - "sentence_nr": 4 + "score": 0.38977903534687897, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.2784899880299974, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.6722683601585776, - "sentence_nr": 4 + "score": 0.24340568946986127, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.49124158433111575, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.5310543174340693, - "sentence_nr": 4 + "score": 0.6549405194208391, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.293597382795084, - "sentence_nr": 4 + "score": 0.12861981016228477, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.6760199025405591, - "sentence_nr": 4 + "score": 0.48957545062365976, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2698809072033866, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.48825277132758194, - "sentence_nr": 4 + "score": 0.5545874741798575, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.27571859863660825, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.743408011301782, - "sentence_nr": 4 + "score": 0.4329139718650503, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.4088276333455685, - "sentence_nr": 4 + "score": 0.3585323035168672, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2592494576098376, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.4745035227847713, - "sentence_nr": 4 + "score": 0.5573927865354532, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.18842393723950338, - "sentence_nr": 4 + "score": 0.3769395725981285, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.5854975500881314, - "sentence_nr": 4 + "score": 0.512767465109827, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.195647514979229, - "sentence_nr": 4 + "score": 0.3769395725981285, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.5725643788499303, - "sentence_nr": 4 + "score": 0.512767465109827, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.38269604012326863, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.4352628824108997, - "sentence_nr": 4 + "score": 0.5200002010352563, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14852672034924091, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.3620843366588185, - "sentence_nr": 4 + "score": 0.3094792226745253, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21787417256423566, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.3550428472545064, - "sentence_nr": 4 + "score": 0.3876689443198751, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.327643461630417, - "sentence_nr": 4 + "score": 0.2801269471205747, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.20149416157064579, - "sentence_nr": 4 + "score": 0.2703977722123393, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.5931139543094289, - "sentence_nr": 4 + "score": 0.44841562670836715, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3939492854375959, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.3782180895945298, - "sentence_nr": 4 + "score": 0.5897931522810166, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.2881761264465535, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5171031329358884, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14577432272792737, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.507081939944787, - "sentence_nr": 4 + "score": 0.3485312345680104, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.16319679661526076, - "sentence_nr": 4 + "score": 0.26329599886727373, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23278028502053263, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.3474927845768493, - "sentence_nr": 4 + "score": 0.5035438414330208, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0.22179945921983923, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.6249971903914197, - "sentence_nr": 4 + "score": 0.23256783267618808, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.5753050684342109, - "sentence_nr": 4 + "score": 0.2503888994539979, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.5116862201536014, - "sentence_nr": 4 + "score": 0.29117376371232984, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.22131477988685871, - "sentence_nr": 4 + "score": 0.1959572889882663, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.6188310784475567, - "sentence_nr": 4 + "score": 0.47526287474947615, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19427446513842178, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.49289897908980135, - "sentence_nr": 4 + "score": 0.3989194575281647, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.514391848002756, - "sentence_nr": 4 + "score": 0.23317448188282916, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26667143002371785, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5141209056236068, - "sentence_nr": 4 + "score": 0.472538453999306, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35138749399652214, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.5121650809135759, - "sentence_nr": 4 + "score": 0.4696291825462069, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16731078418090023, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.6189674633089594, - "sentence_nr": 4 + "score": 0.4141738376060544, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.30778741582971547, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.6248961527161889, - "sentence_nr": 4 + "score": 0.5130815842691727, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6347346879982754, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.3555531255203411, - "sentence_nr": 4 + "score": 0.8012052767784481, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4044405568460044, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.5116862201536014, - "sentence_nr": 4 + "score": 0.6621599148868252, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6848075777090852, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.33494612818381275, - "sentence_nr": 4 + "score": 0.7973909673112908, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5406697535191133, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.520472515533923, - "sentence_nr": 4 + "score": 0.669004783760823, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.8172480674634086, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.33471616336068044, - "sentence_nr": 4 + "score": 0.8884449620958097, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5792652217448795, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.3198143076622585, - "sentence_nr": 4 + "score": 0.6407626172619735, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.44411712310948115, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.25944035160413503, - "sentence_nr": 4 + "score": 0.6797128514623865, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5100759298724028, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.38085857828188696, - "sentence_nr": 4 + "score": 0.5779765318107316, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4317853842116786, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.4558620539723005, - "sentence_nr": 4 + "score": 0.5210207497435424, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.38637605442040596, - "sentence_nr": 4 + "score": 0.3010103123092385, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.33667089470100775, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.46112487179211115, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39930425989686696, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.4054892447711709, - "sentence_nr": 4 + "score": 0.6331705656079759, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10332090908268508, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.172700810315234, - "sentence_nr": 4 + "score": 0.38232840987382055, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.33494612818381275, - "sentence_nr": 4 + "score": 0.261609050082693, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.28977907494497107, - "sentence_nr": 4 + "score": 0.14198465334075994, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.6663117339552681, - "sentence_nr": 4 + "score": 0.41732035784825816, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.2961516536011624, - "sentence_nr": 4 + "score": 0.2620499195763038, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.7355780986981637, - "sentence_nr": 4 + "score": 0.45536529760795086, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.2865612242047131, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.6433813179203622, - "sentence_nr": 4 + "score": 0.398116228128051, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.26518122980477765, - "sentence_nr": 4 + "score": 0.5295736590451592, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6541173886447416, - "sentence_nr": 4 + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7304847791671485, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.26220676436185975, - "sentence_nr": 4 + "score": 0.2513073726775429, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.646323175287155, - "sentence_nr": 4 + "score": 0.46777360451268357, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.6245566175148537, - "sentence_nr": 4 + "score": 0.29218790157077157, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.17869133709232915, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.3044932648594964, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1748462645190237, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.6245566175148537, - "sentence_nr": 4 + "score": 0.3066192361421124, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.44261439814445486, - "sentence_nr": 4 + "score": 0.17832499439115537, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37164490436142156, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.6285229436299309, - "sentence_nr": 4 + "score": 0.5019119409563002, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.29254488484029956, - "sentence_nr": 4 + "score": 0.2608352075029667, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.3040559696901293, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.6658994383739726, - "sentence_nr": 4 + "score": 0.2756134147603395, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.4276859054768592, - "sentence_nr": 4 + "score": 0.40663529259554226, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.124420945512177, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.43620605921972144, - "sentence_nr": 4 + "score": 0.3252636443012772, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.3598792258309727, - "sentence_nr": 4 + "score": 0.3434676785325562, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14383482326455638, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.3258323649865128, - "sentence_nr": 4 + "score": 0.4802954812345209, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10375099461814634, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.3209186108619747, - "sentence_nr": 4 + "score": 0.37536190411744585, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.45100597619813854, - "sentence_nr": 4 + "score": 0.33140473433925555, + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.6245566175148537, - "sentence_nr": 4 + "score": 0.10066789757060902, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21673270446328768, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.6245566175148537, - "sentence_nr": 4 + "score": 0.22918023240505858, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09279771067975602, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.42278121257828405, - "sentence_nr": 4 + "score": 0.13256098526803148, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2310325762059593, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.5459722940273104, - "sentence_nr": 4 + "score": 0.4527304959287235, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4389321784429702, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.22011791783839232, - "sentence_nr": 4 + "score": 0.5672441723286317, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21488309480401357, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.6212521406814923, - "sentence_nr": 4 + "score": 0.49730166693272293, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.4813598669606701, - "sentence_nr": 4 + "score": 0.3010782087051575, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.5343147728119615, - "sentence_nr": 4 + "score": 0.23176692681985775, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.5125809225356253, - "sentence_nr": 4 + "score": 0.25533887786336174, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.44711013370113256, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.5125809225356253, - "sentence_nr": 4 + "score": 0.7319347493436125, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.49023502313124495, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.7638414724136195, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4424906782646928, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.47825370157575003, - "sentence_nr": 4 + "score": 0.705507971295129, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21326369102393236, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.47825370157575003, - "sentence_nr": 4 + "score": 0.24781828193168487, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07860105393900486, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.4935153102946312, - "sentence_nr": 4 + "score": 0.09678377693633947, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11601141307045003, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.47825370157575003, - "sentence_nr": 4 + "score": 0.21671187566850864, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2577716972449781, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.47825370157575003, - "sentence_nr": 4 + "score": 0.5171901208397282, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35015224715252113, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.4646331830467803, - "sentence_nr": 4 + "score": 0.5701648579139658, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3349252032650068, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.5063020142455625, - "sentence_nr": 4 + "score": 0.5908087431574293, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3274016883618531, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.5963099883424426, - "sentence_nr": 4 + "score": 0.5570399656004248, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3449058130015412, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.6245566175148537, - "sentence_nr": 4 + "score": 0.5365619830343804, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29688845677442144, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 + "score": 0.5494319015457763, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.207314191412716, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 + "score": 0.4360555836773355, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25376032254696296, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.5633410521280906, - "sentence_nr": 4 + "score": 0.5334329403985332, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20039141607873007, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.6265140753983048, - "sentence_nr": 4 + "score": 0.36123312088832493, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3235473265529593, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.6245566175148537, - "sentence_nr": 4 + "score": 0.5441122251341168, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.32707695373369694, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.6265140753983048, - "sentence_nr": 4 + "score": 0.5166643606783462, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3069937936246452, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.5963099883424426, - "sentence_nr": 4 + "score": 0.5024648105961349, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26513488970168847, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.6265140753983048, - "sentence_nr": 4 + "score": 0.6029932145447834, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3927237741677927, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.27979820860022203, - "sentence_nr": 4 + "score": 0.7451438087039315, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5570357635362685, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.6056889168578378, - "sentence_nr": 4 + "score": 0.8116469942298856, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.2102369368326755, - "sentence_nr": 4 + "score": 0.3860973950960897, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.5768887726639784, - "sentence_nr": 4 + "score": 0.6271680934322363, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3826576187198625, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.5326397959358325, - "sentence_nr": 4 + "score": 0.6071841372061269, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3447241447679157, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.5226572946586268, - "sentence_nr": 4 + "score": 0.5531085140985558, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27710310401156996, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5226572946586268, - "sentence_nr": 4 + "score": 0.5216248191624099, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31128635710849173, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5210387656594864, - "sentence_nr": 4 + "score": 0.6304411194127884, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13308561809919006, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.48085787079671877, - "sentence_nr": 4 + "score": 0.5312476702183977, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20475739007221866, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.48299229988531533, - "sentence_nr": 4 + "score": 0.3934874462686164, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.2102369368326755, - "sentence_nr": 4 + "score": 0.2879556779114461, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.5876589559170592, - "sentence_nr": 4 + "score": 0.4554184077174173, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09578921953028982, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.5309753107573227, - "sentence_nr": 4 + "score": 0.40472887922389433, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4730742700342366, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5309753107573227, - "sentence_nr": 4 + "score": 0.6716818492415609, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.38729516708438194, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.4814496481274858, - "sentence_nr": 4 + "score": 0.6571482446395243, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.195647514979229, - "sentence_nr": 4 + "score": 0.32995628251235876, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5857714957546027, - "sentence_nr": 4 + "score": 0.5982616321404195, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20057225201358211, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.4877445613866086, - "sentence_nr": 4 + "score": 0.47576367606491715, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3665528144045068, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5073395824633415, - "sentence_nr": 4 + "score": 0.5966563047685359, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2587297749908005, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5073395824633415, - "sentence_nr": 4 + "score": 0.5132023401682766, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3151295371556651, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5054194884603328, - "sentence_nr": 4 + "score": 0.6242597159052685, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29894673648596126, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.49233042976388086, - "sentence_nr": 4 + "score": 0.6322815922673689, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18330256089173447, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.6171472238624475, - "sentence_nr": 4 + "score": 0.5328391139635578, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5295672450222603, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5079994737492071, - "sentence_nr": 4 + "score": 0.715157413474444, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.7416488036617811, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5079994737492071, - "sentence_nr": 4 + "score": 0.9156308978596118, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2924496936199556, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5217233763439505, - "sentence_nr": 4 + "score": 0.6040522123603048, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2995728063785384, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.4877445613866086, - "sentence_nr": 4 + "score": 0.41824297302824903, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23119301671666287, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.31122692088261866, - "sentence_nr": 4 + "score": 0.354782287640505, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20419333453691463, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.5362380779055197, - "sentence_nr": 4 + "score": 0.39470297247688435, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2356808688936941, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.4148097947848928, - "sentence_nr": 4 + "score": 0.6055769871534641, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26230609318615344, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.33709347944719925, - "sentence_nr": 4 + "score": 0.5822825437641462, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3131514736435596, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.29382595610734974, - "sentence_nr": 4 + "score": 0.5852253917268062, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.34259841795512924, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.31966312198190094, - "sentence_nr": 4 + "score": 0.585466128440694, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22551665711721375, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.3095023687399762, - "sentence_nr": 4 + "score": 0.5279805413050443, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1565711240691367, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.39257815659454015, - "sentence_nr": 4 + "score": 0.5086519720619103, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1314770573188977, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.33709347944719925, - "sentence_nr": 4 + "score": 0.42247455964888564, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08905591495817854, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.4148097947848928, - "sentence_nr": 4 + "score": 0.39258991796140724, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.16839298730242253, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3815455050057558, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2058592651002273, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.4148097947848928, - "sentence_nr": 4 + "score": 0.4968187716376065, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.34140765467102613, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.31805405607794895, - "sentence_nr": 4 + "score": 0.6319466141042981, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22592501932697184, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.3195891679001926, - "sentence_nr": 4 + "score": 0.4846944833505256, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.2682764485524619, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.7180407770761651, - "sentence_nr": 4 + "score": 0.5066876769170529, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.3222538601891173, - "sentence_nr": 4 + "score": 0.24572492027154266, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.7495871587703783, - "sentence_nr": 4 + "score": 0.5262167453128139, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.1809849704111973, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.5773664661124461, - "sentence_nr": 4 + "score": 0.5183477609966307, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.6732778877516836, - "sentence_nr": 4 + "score": 0.3348767210373086, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.3146660996956415, - "sentence_nr": 4 + "score": 0.11543246428739175, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.774919653861933, - "sentence_nr": 4 + "score": 0.3867652322513648, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.45984464012364756, - "sentence_nr": 4 + "score": 0.1752510532254824, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.38762928139075753, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.6474126202050918, - "sentence_nr": 4 + "score": 0.5446552031353901, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2574866101628968, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.6277082350099422, - "sentence_nr": 4 + "score": 0.5192222793733214, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.4165851519399544, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5626264040219835, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.3222538601891173, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.7495871587703783, - "sentence_nr": 4 + "score": 0.18968668542064138, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.3005283910333271, - "sentence_nr": 4 + "score": 0.2062407699777119, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.2467789409989967, - "sentence_nr": 4 + "score": 0.1636538054009315, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.702397320872287, - "sentence_nr": 4 + "score": 0.27030640495259156, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31172573638980083, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.3437729074300146, - "sentence_nr": 4 + "score": 0.45166421390889583, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2905178491524662, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.30577290788405437, - "sentence_nr": 4 + "score": 0.4540869803464816, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4416037594311788, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.3053963874050995, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.3025031428331747, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.3038662664425978, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.2982854478221892, - "sentence_nr": 4 + "score": 0.5926552552596459, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4286493815549817, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.34100189594952895, - "sentence_nr": 4 + "score": 0.608929454341427, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5441799557959847, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.3421048582632637, - "sentence_nr": 4 + "score": 0.7471708187115776, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2737674171768533, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.31725906238707696, - "sentence_nr": 4 + "score": 0.49294682774777393, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24691200113491704, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.3355517973989557, - "sentence_nr": 4 + "score": 0.594269344169987, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.36092600834422417, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.3329758884511854, - "sentence_nr": 4 + "score": 0.6496714079703786, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2322365386782331, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.3437729074300146, - "sentence_nr": 4 + "score": 0.6044384701497908, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.14962848372546667, - "sentence_nr": 4 + "score": 0.15435869219704992, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.5531110803538978, - "sentence_nr": 4 + "score": 0.44156603809589606, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21605302662107204, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.4660343508894544, - "sentence_nr": 4 + "score": 0.5282568801368064, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26458538438609513, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.43340932146378, - "sentence_nr": 4 + "score": 0.5387766932929448, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09757710283079672, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.4169735477570989, - "sentence_nr": 4 + "score": 0.41975185347384797, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07228800742761465, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.43189251696918196, - "sentence_nr": 4 + "score": 0.3524948213332762, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.41934944341336317, - "sentence_nr": 4 + "score": 0.3972317449044497, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4516989732837159, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5242381673349054, - "sentence_nr": 4 + "score": 0.5415798785310697, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3972301056942388, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.5725197387086665, - "sentence_nr": 4 + "score": 0.5673240077710876, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27691593390925684, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.4218058951037998, - "sentence_nr": 4 + "score": 0.397254897780366, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29808592153996283, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.4817075905641231, - "sentence_nr": 4 + "score": 0.5278034227464174, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37566372103146395, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2075521577117978, - "sentence_nr": 4 + "score": 0.5970100838075844, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.15980518115118317, - "sentence_nr": 4 + "score": 0.37989456711162206, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "kn", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.6117406545411793, - "sentence_nr": 4 + "score": 0.5769558960935135, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ml", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.20038908500140973, - "sentence_nr": 4 + "score": 0.19943370880401756, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ml", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.6177327642561014, - "sentence_nr": 4 + "score": 0.5118140585073162, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ml", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.21258844131063828, - "sentence_nr": 4 + "score": 0.5136268735913038, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ml", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.6314891370223008, - "sentence_nr": 4 + "score": 0.747250567370702, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ml", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.15821285888349254, - "sentence_nr": 4 + "score": 0.5269786261674732, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ml", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.6605676082065987, - "sentence_nr": 4 + "score": 0.7211839703961767, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09697472565363051, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.5352059639541527, - "sentence_nr": 4 + "score": 0.3745932362705387, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18473000217570404, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4817621003925206, - "sentence_nr": 4 + "score": 0.4760100031203425, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.18710158230410626, - "sentence_nr": 4 + "score": 0.18239336435292994, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.6827304460872186, - "sentence_nr": 4 + "score": 0.5029345390359463, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25259068609065677, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.5359775791756516, - "sentence_nr": 4 + "score": 0.5384891408722134, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24887831362342314, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.5220823443002603, - "sentence_nr": 4 + "score": 0.5200881906842448, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.18842393723950338, - "sentence_nr": 4 + "score": 0.24135344239900156, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.5761879138789098, - "sentence_nr": 4 + "score": 0.546651751367433, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.20038908500140973, - "sentence_nr": 4 + "score": 0.09561039442149676, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.6177327642561014, - "sentence_nr": 4 + "score": 0.410158328815622, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11290965451231479, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.44210435496259043, - "sentence_nr": 4 + "score": 0.4125077608038444, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.3807134866446316, - "sentence_nr": 4 + "score": 0.09225101275784285, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.6734021595321634, - "sentence_nr": 4 + "score": 0.37350112612982594, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3471239643495614, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.5873831965245108, - "sentence_nr": 4 + "score": 0.5206611437816837, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08961889092065865, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.5700887051433648, - "sentence_nr": 4 + "score": 0.3874168292468809, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08100859619233457, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.5873831965245108, - "sentence_nr": 4 + "score": 0.24451849449561322, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27103337618153645, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.47099274965068205, - "sentence_nr": 4 + "score": 0.5640525084856031, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.384089730116398, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.41805694116981745, - "sentence_nr": 4 + "score": 0.745723691136233, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.32244421248930727, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.3885961889310864, - "sentence_nr": 4 + "score": 0.647615627796667, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.5719181187428595, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.9087119657256413, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.5599655154601001, - "sentence_nr": 4 + "score": 0.9560455759500431, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.5719181187428595, - "sentence_nr": 4 + "score": 0.019979749853566316, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16655520792269457, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.16432863675964413, - "sentence_nr": 4 + "score": 0.2978831898624651, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.558984948114654, - "sentence_nr": 4 + "score": 0.055358855900579915, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08441965713154952, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.4133673303529474, - "sentence_nr": 4 + "score": 0.3089195000557714, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10224003680109194, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.48231853956144055, - "sentence_nr": 4 + "score": 0.43617932503833745, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08610814090366911, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.37314692804855976, - "sentence_nr": 4 + "score": 0.2808850562492427, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3951799525041884, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4120675260154046, - "sentence_nr": 4 + "score": 0.5778309496373982, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3366701635309648, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.4669656953948632, - "sentence_nr": 4 + "score": 0.5410405158608997, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2101485462573989, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.43325089547434603, - "sentence_nr": 4 + "score": 0.432324099789087, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12174947256163497, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.4787742817228935, - "sentence_nr": 4 + "score": 0.46773004477276736, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3106867450120684, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.43325089547434603, - "sentence_nr": 4 + "score": 0.6219524559268158, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15986509194135143, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.4368967740154242, - "sentence_nr": 4 + "score": 0.5267000984208814, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21299929835435763, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.43325089547434603, - "sentence_nr": 4 + "score": 0.435252897241854, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40403057670242104, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.40580143204058805, - "sentence_nr": 4 + "score": 0.6369130541839939, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20581560303679203, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.4787742817228935, - "sentence_nr": 4 + "score": 0.4423856837970847, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.17694975149532557, - "sentence_nr": 4 + "score": 0.12351026307731532, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.4902785344040517, - "sentence_nr": 4 + "score": 0.412497189904211, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.23545536435121683, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.5196343731603573, - "sentence_nr": 4 + "score": 0.5385196954654492, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26585394939983104, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.4254686256509745, - "sentence_nr": 4 + "score": 0.4531567783270482, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3581233108133512, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.3088290057043984, - "sentence_nr": 4 + "score": 0.5681591513115792, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5411668461990086, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.28985008910948157, - "sentence_nr": 4 + "score": 0.6522264060979466, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2989125578949377, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.19951581244033986, - "sentence_nr": 4 + "score": 0.5919388623239312, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.21972813874997157, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.43044062502463715, - "sentence_nr": 4 + "score": 0.1456594557011165, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.4103552603347404, - "sentence_nr": 4 + "score": 0.37283362365421463, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.21236579931503258, - "sentence_nr": 4 + "score": 0.008318556718037537, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.1196655750514248, - "sentence_nr": 4 + "score": 0.26079296949054936, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.43807296710349614, - "sentence_nr": 4 + "score": 0.4683174146167095, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.313279920188343, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.1790439216234942, - "sentence_nr": 4 + "score": 0.4874026443611231, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.312235987246766, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.30577290788405437, - "sentence_nr": 4 + "score": 0.48588160953552434, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1079864637785236, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.20793313992045814, - "sentence_nr": 4 + "score": 0.22429322594709664, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09638209202576767, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.4271693186358773, - "sentence_nr": 4 + "score": 0.36140609257046646, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09704066931673304, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.44583799328544693, - "sentence_nr": 4 + "score": 0.21746317771363752, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1662362263248686, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.44562997179553193, - "sentence_nr": 4 + "score": 0.3954564642899918, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16004579189194332, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.394895635806623, - "sentence_nr": 4 + "score": 0.47245794353514825, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19803483846782313, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.24831335764102336, - "sentence_nr": 4 + "score": 0.48499595181063654, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4106421326776598, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4334317870334209, - "sentence_nr": 4 + "score": 0.7234753107719588, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3109995020889346, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4711738743510459, - "sentence_nr": 4 + "score": 0.6239402221019062, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29885803796555876, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.3394357133920106, - "sentence_nr": 4 + "score": 0.5453318063660885, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.308591734625294, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.41527307940938124, - "sentence_nr": 4 + "score": 0.47229626882753806, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2639855268912507, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.012870012870012871, - "sentence_nr": 4 + "score": 0.4723046890503893, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18245182973446727, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.3452137418984674, - "sentence_nr": 4 + "score": 0.43628086807089367, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.37030468338190614, - "sentence_nr": 4 + "score": 0.1799424456747602, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.7587397825317436, - "sentence_nr": 4 + "score": 0.483712340915935, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.29420957081163707, - "sentence_nr": 4 + "score": 0.22310118314393076, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.735955064899578, - "sentence_nr": 4 + "score": 0.48996223146164697, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.09108351723640423, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.7290399536251687, - "sentence_nr": 4 + "score": 0.3753289604442426, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.22894156860669912, - "sentence_nr": 4 + "score": 0.36733064093075246, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.6279102184928337, - "sentence_nr": 4 + "score": 0.5166610542835689, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.6400394598805441, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.7290399536251687, - "sentence_nr": 4 + "score": 0.7975136683846923, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.2636617253730958, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.658571547163188, - "sentence_nr": 4 + "score": 0.5820502899836056, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.09931887908771599, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.6568979068982934, - "sentence_nr": 4 + "score": 0.5290709591963555, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.22786788980326644, - "sentence_nr": 4 + "score": 0.1378743489117572, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.7242044123301367, - "sentence_nr": 4 + "score": 0.5086655694099045, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.29502343631964045, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.6568979068982934, - "sentence_nr": 4 + "score": 0.6084768644437066, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3203709331713924, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.6245952145297528, - "sentence_nr": 4 + "score": 0.6011010434667033, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.4047055149633949, - "sentence_nr": 4 + "score": 0.42100166812465206, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.1595594727127383, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.7290399536251687, - "sentence_nr": 4 + "score": 0.4576211976748643, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10078945601268775, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.40933226567881303, - "sentence_nr": 4 + "score": 0.261100760720076, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09253405254882056, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.5099813007320333, - "sentence_nr": 4 + "score": 0.32379787254738834, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.46778058365701697, - "sentence_nr": 4 + "score": 0.27741626606602926, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.4410456674024549, - "sentence_nr": 4 + "score": 0.3189379698441311, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11749577060207901, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.40005353469571986, - "sentence_nr": 4 + "score": 0.3982271272491047, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.031688534538597726, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.4670473691722499, - "sentence_nr": 4 + "score": 0.1890804036427279, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1273184616076436, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.5060729263955541, - "sentence_nr": 4 + "score": 0.3854117498605121, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19425852569761656, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.5187697231766483, - "sentence_nr": 4 + "score": 0.49223809908076244, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.10485465064884184, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.32492314763382363, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2491671393077868, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.43447037786636045, - "sentence_nr": 4 + "score": 0.4968863994886645, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11317455078536574, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.177662262614737, - "sentence_nr": 4 + "score": 0.45024294423829864, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19572923134753795, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.40933226567881303, - "sentence_nr": 4 + "score": 0.5295330264022122, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3284892459410516, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.1433583753123658, - "sentence_nr": 4 + "score": 0.5241940555676348, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4333728840229128, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.49048115595910957, - "sentence_nr": 4 + "score": 0.5891294993540688, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2630714263398001, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.36660871058936323, - "sentence_nr": 4 + "score": 0.5158255160824443, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05285535428575768, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.43058619444646323, - "sentence_nr": 4 + "score": 0.09737192246778478, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1807196053330159, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.2155389842479503, - "sentence_nr": 4 + "score": 0.3965678244961598, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1781762226766203, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.2604481569923449, - "sentence_nr": 4 + "score": 0.34132344846940776, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.03433235964400958, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06208764634995977, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.47698240079411425, - "sentence_nr": 4 + "score": 0.26302014232397014, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20778408193957135, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08712386724565187, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.45987051548888325, - "sentence_nr": 4 + "score": 0.4181195306395242, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20968620727095338, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.19164010393444778, - "sentence_nr": 4 + "score": 0.5464020265142666, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1954345051420573, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.2595747500221293, - "sentence_nr": 4 + "score": 0.5330042985514847, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.4698447771642698, - "sentence_nr": 4 + "score": 0.17197056422381857, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.2891112498777974, - "sentence_nr": 4 + "score": 0.18342876748823245, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.20506702518574138, - "sentence_nr": 4 + "score": 0.20686289430930357, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.22436115527072104, - "sentence_nr": 4 + "score": 0.023189147281557864, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1828605775949752, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.18954169863009754, - "sentence_nr": 4 + "score": 0.43762946682189374, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25874549081196363, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.16544469489755873, - "sentence_nr": 4 + "score": 0.45746164978998116, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2979184100665707, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.514916507474197, - "sentence_nr": 4 + "score": 0.6419705855699708, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2930190257785366, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.2624553065941702, - "sentence_nr": 4 + "score": 0.6713003163919651, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31164730348889336, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.18020185542463263, - "sentence_nr": 4 + "score": 0.6359774323093672, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18212463619188357, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.28803368842227195, - "sentence_nr": 4 + "score": 0.39972650121479075, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.14784675458566054, - "sentence_nr": 4 + "score": 0.4752769115696177, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17484203645083327, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.16593172081379223, - "sentence_nr": 4 + "score": 0.48749480755149777, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43338271162956393, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.15084092981447839, - "sentence_nr": 4 + "score": 0.6374354417480721, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3324045604922567, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.44419236241196947, - "sentence_nr": 4 + "score": 0.5837709820195482, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31609768845959424, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.43706010794795863, - "sentence_nr": 4 + "score": 0.5673141497742401, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3923185674218498, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.3859454347966736, - "sentence_nr": 4 + "score": 0.5839066770456797, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2932716844020195, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.43016467114650775, - "sentence_nr": 4 + "score": 0.6372147855611874, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3429545250720308, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.382488360211396, - "sentence_nr": 4 + "score": 0.6338733776441033, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2256335120807399, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.3994368992494938, - "sentence_nr": 4 + "score": 0.41142902331306563, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27518912165164433, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.3806471506505897, - "sentence_nr": 4 + "score": 0.5271056340471697, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.17035865562294203, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4491431969626715, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10925485406610212, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.4322358438398362, - "sentence_nr": 4 + "score": 0.40585265442330004, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13953680853761288, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.39126796773446315, - "sentence_nr": 4 + "score": 0.36641285502598664, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11929906740245497, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.4067122353485367, - "sentence_nr": 4 + "score": 0.31766803297095625, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.034925844078130285, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.40969772112178865, - "sentence_nr": 4 + "score": 0.12176905733039614, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.36300532343703745, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.4134635647455475, - "sentence_nr": 4 + "score": 0.5885664760313373, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21789876624905538, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.3934230844821369, - "sentence_nr": 4 + "score": 0.3682819491047234, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09257776335697764, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.40972213900070625, - "sentence_nr": 4 + "score": 0.4234307006910246, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18347190231449206, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.22713927769854092, - "sentence_nr": 4 + "score": 0.4239252832108403, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19461047263115017, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.4122705622809137, - "sentence_nr": 4 + "score": 0.45988413428389885, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13139217875016063, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.4297822673018643, - "sentence_nr": 4 + "score": 0.2663715525584477, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07746741482791356, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.45934745896511686, - "sentence_nr": 4 + "score": 0.33235742788356043, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21905325152754598, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3163506555011039, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.41874354622000565, - "sentence_nr": 4 + "score": 0.5609222668505561, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21270966597282465, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.21998251147567982, - "sentence_nr": 4 + "score": 0.5460005359719966, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1769715756286121, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.45798388821076286, - "sentence_nr": 4 + "score": 0.4730914561312012, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07799070710306764, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.3289300072190674, - "sentence_nr": 4 + "score": 0.33308031587923975, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1409579985182915, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.32413768919026276, - "sentence_nr": 4 + "score": 0.48389215304595906, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11862036075744598, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.34057065677205645, - "sentence_nr": 4 + "score": 0.444201724436452, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.34057065677205645, - "sentence_nr": 4 + "score": 0.3133814893022024, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07833962045925216, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.3345333252096335, - "sentence_nr": 4 + "score": 0.4012947847397506, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.32001325532974667, - "sentence_nr": 4 + "score": 0.27588069957732886, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.3138244971309572, - "sentence_nr": 4 + "score": 0.01170165350096746, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10692719022071172, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.3155522046261439, - "sentence_nr": 4 + "score": 0.20917377194877063, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.306554798549301, - "sentence_nr": 4 + "score": 0.10184861034465903, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.41377038486289314, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.32265595231344285, - "sentence_nr": 4 + "score": 0.6103253550829261, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3380920573004095, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.30978899606166077, - "sentence_nr": 4 + "score": 0.5109739521707614, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3234649237345335, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.32476827258498703, - "sentence_nr": 4 + "score": 0.4780933016980809, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.21142141714303078, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.43493490557877573, - "sentence_nr": 4 + "score": 0.02205958411600357, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.28991415068332943, - "sentence_nr": 4 + "score": 0.34582774491793894, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.2535985303909064, - "sentence_nr": 4 + "score": 0.2893596714040359, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22816849039973935, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.2906936157823074, - "sentence_nr": 4 + "score": 0.5295534280606148, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.348007986647201, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.30032663294181017, - "sentence_nr": 4 + "score": 0.6148736550683231, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31222258402876674, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.4169329809261592, - "sentence_nr": 4 + "score": 0.5549937870516303, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.24141428403439927, - "sentence_nr": 4 + "score": 0.10721126066665879, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23683075175361493, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.4243475188011289, - "sentence_nr": 4 + "score": 0.2631328190836655, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.23943445872806784, - "sentence_nr": 4 + "score": 0.16455392433653304, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.2044800736021839, - "sentence_nr": 4 + "score": 0.36033217429111203, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.42460680673338275, - "sentence_nr": 4 + "score": 0.5707860320039717, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.461597801606675, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.27812527095899386, - "sentence_nr": 4 + "score": 0.6280777654467244, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4224298950114519, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.2610708875934103, - "sentence_nr": 4 + "score": 0.60823085524287, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.41597114236951854, - "sentence_nr": 4 + "score": 0.4141871474340027, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.4350989271447826, - "sentence_nr": 4 + "score": 0.36769040719718776, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.4297476286175239, - "sentence_nr": 4 + "score": 0.4064141882459388, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3096036988813059, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.42459015345630374, - "sentence_nr": 4 + "score": 0.5894510883198948, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3286711939680359, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.4173469189995656, - "sentence_nr": 4 + "score": 0.5944310794747374, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13547277341758465, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.4322857520745532, - "sentence_nr": 4 + "score": 0.4830189619506113, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23841754841770157, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.4322857520745532, - "sentence_nr": 4 + "score": 0.34481325534410395, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2822535302220024, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.4322857520745532, - "sentence_nr": 4 + "score": 0.3874773378787974, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31747697264511426, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.41231967817566284, - "sentence_nr": 4 + "score": 0.40797778663955364, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43368945552925614, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.41850408232257996, - "sentence_nr": 4 + "score": 0.6793717376740783, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.46935933364934335, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.39529824050490364, - "sentence_nr": 4 + "score": 0.773055573548356, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43368945552925614, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.4322857520745532, - "sentence_nr": 4 + "score": 0.640995178057518, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20050320605789015, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.6390929517394389, - "sentence_nr": 4 + "score": 0.4046291070099031, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3160213610127146, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.4714767063337979, - "sentence_nr": 4 + "score": 0.5165614670038283, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.30758744700466467, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.5124045843781208, - "sentence_nr": 4 + "score": 0.4684197705189288, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4161791450287817, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5823727987141498, - "sentence_nr": 4 + "score": 0.7054426787013603, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2961516536011624, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.40525561144552713, - "sentence_nr": 4 + "score": 0.49803924348035766, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3459667618766101, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5052295296496148, - "sentence_nr": 4 + "score": 0.6170810606402402, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.65462584415807, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.46832763312452297, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.6418812400466414, - "sentence_nr": 4 + "score": 0.8176110134774669, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.9436043261706615, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.46854856592314836, - "sentence_nr": 4 + "score": 0.9880191679951993, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2830789070123405, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5129829767946291, - "sentence_nr": 4 + "score": 0.41858897147271634, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4593546097889176, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.4317996505616924, - "sentence_nr": 4 + "score": 0.608602146246901, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23386786214190372, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5991944525613995, - "sentence_nr": 4 + "score": 0.46662929903381617, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.42067720018268145, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.4506325776546161, - "sentence_nr": 4 + "score": 0.6309653612961436, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3397180516736864, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.4754711104013267, - "sentence_nr": 4 + "score": 0.6494995648532881, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23811989337799513, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.4775280122429458, - "sentence_nr": 4 + "score": 0.4812080785035883, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22166358657237664, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.4930829209420314, - "sentence_nr": 4 + "score": 0.45834104234305023, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.255918614113723, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.4070672647268937, - "sentence_nr": 4 + "score": 0.44318862516624546, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35285733014385007, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.6406089576789361, - "sentence_nr": 4 + "score": 0.5945202859296662, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2460137257692754, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.65462584415807, - "sentence_nr": 4 + "score": 0.5397894338370378, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2809009542151822, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.6406089576789361, - "sentence_nr": 4 + "score": 0.5819888906713027, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4573889291137309, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.41229295635175445, - "sentence_nr": 4 + "score": 0.6974989991762017, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.4725994974466954, - "sentence_nr": 4 + "score": 0.27185330211646, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.3527713788852087, - "sentence_nr": 4 + "score": 0.3487575221722675, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15824382329465247, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.4506325776546161, - "sentence_nr": 4 + "score": 0.2847034639706718, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1954328575417037, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.18900427606312895, - "sentence_nr": 4 + "score": 0.30293053536483644, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27914759735007616, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.2379375267482382, - "sentence_nr": 4 + "score": 0.3778334876813023, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33758329691923056, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.1935812904783315, - "sentence_nr": 4 + "score": 0.4156592381734106, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16177172185899982, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.15694956267772112, - "sentence_nr": 4 + "score": 0.35656294562478846, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1196671345116598, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.1519320263160399, - "sentence_nr": 4 + "score": 0.4250373762074513, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.1589166045294459, - "sentence_nr": 4 + "score": 0.2805776238834007, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2149405271477231, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.39028093982330686, - "sentence_nr": 4 + "score": 0.40802426587644575, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2551158426117226, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.24534473408944998, - "sentence_nr": 4 + "score": 0.4338792723307896, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.15426765696281117, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.36404432411255067, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35138749399652214, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.24125600379049897, - "sentence_nr": 4 + "score": 0.5764814746526004, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.28859523083492383, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.17016732449779756, - "sentence_nr": 4 + "score": 0.5243060035864178, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4258744680390019, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.16493998136610505, - "sentence_nr": 4 + "score": 0.678077932919771, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20690996611611379, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.5406340703314851, - "sentence_nr": 4 + "score": 0.5406044734296481, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.2102369368326755, - "sentence_nr": 4 + "score": 0.23626187156344583, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.5597860306970691, - "sentence_nr": 4 + "score": 0.5259172349664247, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14065108796517406, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.2853612459193062, - "sentence_nr": 4 + "score": 0.4653016103437884, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.32876879948340443, - "sentence_nr": 4 + "score": 0.10556737722881891, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.25426284054822956, - "sentence_nr": 4 + "score": 0.3637736731596556, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.17586616574792086, - "sentence_nr": 4 + "score": 0.2840158072494531, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.14669785543513997, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.6177791303755155, - "sentence_nr": 4 + "score": 0.3937770501596208, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31123452089299364, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.2777563958324541, - "sentence_nr": 4 + "score": 0.4756674690337691, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.18506115334020554, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5096863469728294, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.5168203343451147, - "sentence_nr": 4 + "score": 0.11596632790546842, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16510653858282376, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.1803079973226055, - "sentence_nr": 4 + "score": 0.14514613027046297, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.30824944361075485, - "sentence_nr": 4 + "score": 0.1479818938322678, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.18842393723950338, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.5854975500881314, - "sentence_nr": 4 + "score": 0.2188350647878129, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.15774545980684188, - "sentence_nr": 4 + "score": 0.0744575238654659, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.616730419953906, - "sentence_nr": 4 + "score": 0.18081240117184907, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2406197931700809, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.44478883235565975, - "sentence_nr": 4 + "score": 0.3480566207137981, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18762960550870147, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.5211859078984742, - "sentence_nr": 4 + "score": 0.5048762254598883, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19089395298343978, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.6038769794796149, - "sentence_nr": 4 + "score": 0.4775461600364851, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.16467029855845897, - "sentence_nr": 4 + "score": 0.18762960550870147, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.521517293959126, - "sentence_nr": 4 + "score": 0.505905280342391, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.35540164109034167, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.6568979068982934, - "sentence_nr": 4 + "score": 0.5028380576753965, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31087084319061986, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.469904565627719, - "sentence_nr": 4 + "score": 0.48493403576578487, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.2843877927900334, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5287507679912548, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.21651956746181053, - "sentence_nr": 4 + "score": 0.15300252291898875, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.6330972445771483, - "sentence_nr": 4 + "score": 0.5221038026847953, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3724134618099435, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.37066963888928744, - "sentence_nr": 4 + "score": 0.5411724478620953, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10991514729498916, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.5290142170815124, - "sentence_nr": 4 + "score": 0.35889788560857044, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.1400373960430748, - "sentence_nr": 4 + "score": 0.32692597326475636, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.37309390213978083, - "sentence_nr": 4 + "score": 0.30229895292756775, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.3718896131479321, - "sentence_nr": 4 + "score": 0.32013836373742083, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1784284488020755, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.3101994394372559, - "sentence_nr": 4 + "score": 0.28915891965402907, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.28819072249233424, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.36006710624510274, - "sentence_nr": 4 + "score": 0.4281025086478473, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1722187017293513, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.32694307847435544, - "sentence_nr": 4 + "score": 0.4101544500733319, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09500509002957103, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.3601131188427998, - "sentence_nr": 4 + "score": 0.3165301597200995, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31643186523025857, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.3225168425405649, - "sentence_nr": 4 + "score": 0.4990369740896189, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0972760643475914, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.34268917796251985, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11183447695967942, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.31177283358766017, - "sentence_nr": 4 + "score": 0.2981154499191547, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29160060199573634, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.2350555071806251, - "sentence_nr": 4 + "score": 0.41859679392459287, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16457404825501376, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.34099102423958866, - "sentence_nr": 4 + "score": 0.33333438552725225, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.17534823156623092, - "sentence_nr": 4 + "score": 0.279651701540242, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.4853402799234523, - "sentence_nr": 4 + "score": 0.41325325835097537, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20614290043638034, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.32889384774917263, - "sentence_nr": 4 + "score": 0.4026346891273123, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10458125123016462, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.2497159712696397, - "sentence_nr": 4 + "score": 0.36412391917812675, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39284822049881124, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.17444346314545967, - "sentence_nr": 4 + "score": 0.5731994852253092, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4419177841121404, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.18266088622993074, - "sentence_nr": 4 + "score": 0.6137680308213187, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25567957494892185, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.21918702676823268, - "sentence_nr": 4 + "score": 0.3876058827430677, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.22637359354764466, - "sentence_nr": 4 + "score": 0.32284785906314306, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.6010070471156334, - "sentence_nr": 4 + "score": 0.5391266648881738, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3914713795199333, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.4715150407255315, - "sentence_nr": 4 + "score": 0.5396649879183305, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3049871822195904, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.1647463149973908, - "sentence_nr": 4 + "score": 0.5208588286358844, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33557244321033963, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.4104909902372063, - "sentence_nr": 4 + "score": 0.5209156086024106, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2896941452360586, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.20549680977299417, - "sentence_nr": 4 + "score": 0.41709543651974984, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.18262078594284786, - "sentence_nr": 4 + "score": 0.20254179344857384, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29007724698820536, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.5335315304967084, - "sentence_nr": 4 + "score": 0.574462622391897, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3125048656450529, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.42970160394394363, - "sentence_nr": 4 + "score": 0.5603859508222943, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2267246719280858, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.4370528005274534, - "sentence_nr": 4 + "score": 0.48581943550245826, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.3649154493210098, - "sentence_nr": 4 + "score": 0.3674833290337466, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1929103437854986, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.3512855039522258, - "sentence_nr": 4 + "score": 0.4334775243896179, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09709355995288857, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.3871560819733963, - "sentence_nr": 4 + "score": 0.3190240812909853, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.42850086056861364, - "sentence_nr": 4 + "score": 0.21418093872225227, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2171788734284664, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.6423124418413864, - "sentence_nr": 4 + "score": 0.3041500319808576, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06534434987768793, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.34887639426636174, - "sentence_nr": 4 + "score": 0.19191970942691858, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14710052131359536, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.5181731608711286, - "sentence_nr": 4 + "score": 0.39474728110016194, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.3328231949727341, - "sentence_nr": 4 + "score": 0.32758099155045156, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.40882161860042143, - "sentence_nr": 4 + "score": 0.2954141956759853, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11781409318573131, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.12716724199879337, - "sentence_nr": 4 + "score": 0.3064743661860543, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13399284752024776, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.5690206807396397, - "sentence_nr": 4 + "score": 0.3533000786320299, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1334209353325791, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.5038920264146319, - "sentence_nr": 4 + "score": 0.33435636843826655, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.5395882240381418, - "sentence_nr": 4 + "score": 0.34319152953492454, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16755318066668218, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.42988803625606836, - "sentence_nr": 4 + "score": 0.5076991427590911, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15483009791264127, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.5014180485937552, - "sentence_nr": 4 + "score": 0.421644120450493, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.5350011214551942, - "sentence_nr": 4 + "score": 0.16135142241763387, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1143433820088083, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.570777031854836, - "sentence_nr": 4 + "score": 0.44706528758842706, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1504843536148922, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.49292600165461214, - "sentence_nr": 4 + "score": 0.4754016977092461, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2103182834908572, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.5046162213845391, - "sentence_nr": 4 + "score": 0.3252172327778091, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2103182834908572, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.4210157243284825, - "sentence_nr": 4 + "score": 0.31189147517716775, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2034131578022728, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.49016646730730157, - "sentence_nr": 4 + "score": 0.3250035615909008, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10758299612696903, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.39818525322365445, - "sentence_nr": 4 + "score": 0.3485199255336966, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0936186743453751, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.554411787673542, - "sentence_nr": 4 + "score": 0.34343520280095413, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11007482046907661, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.4146222850152668, - "sentence_nr": 4 + "score": 0.37477767256456523, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10727295782787309, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.44045974327436815, - "sentence_nr": 4 + "score": 0.40945621258421, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10601317434781207, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.39680097980725987, - "sentence_nr": 4 + "score": 0.3549608383744777, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.43833368912949555, - "sentence_nr": 4 + "score": 0.2842642186201746, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1531024544118244, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.4990914601523915, - "sentence_nr": 4 + "score": 0.36865141579883465, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18672196092380966, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.4990914601523915, - "sentence_nr": 4 + "score": 0.5755041733882698, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16591958445638164, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.4143896526938585, - "sentence_nr": 4 + "score": 0.3816020524781071, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5521515399820418, - "sentence_nr": 4 + "score": 0.09131381866486066, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.41400989596585114, - "sentence_nr": 4 + "score": 0.19526867634454675, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.5088672498824739, - "sentence_nr": 4 + "score": 0.06313050320435959, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1334209353325791, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.4194210013201768, - "sentence_nr": 4 + "score": 0.32197244359239063, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0953796259494125, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.5277042186210706, - "sentence_nr": 4 + "score": 0.335376322187008, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1136025775201101, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.3827580433841417, - "sentence_nr": 4 + "score": 0.32399192578787606, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14319746539576667, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.46171989402470454, - "sentence_nr": 4 + "score": 0.38319285812856063, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14743233570338127, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4051297881219653, - "sentence_nr": 4 + "score": 0.363972079338474, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1758556937351426, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.3008659218202134, - "sentence_nr": 4 + "score": 0.4271110965918086, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15112446346253322, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.38636039023995106, - "sentence_nr": 4 + "score": 0.4582754568242892, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15340824575172546, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.36766978144243656, - "sentence_nr": 4 + "score": 0.4948861786840383, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.1591469869206786, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.44132589683753737, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12288887055424895, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.6002163368247662, - "sentence_nr": 4 + "score": 0.43277999825597285, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13004139997277986, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.2536205089423194, - "sentence_nr": 4 + "score": 0.4607205290066856, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.3482169841214968, - "sentence_nr": 4 + "score": 0.39609017426771376, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12131345516615005, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.15388831190795366, - "sentence_nr": 4 + "score": 0.36539799614653956, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3231843559409337, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.24316286154385877, - "sentence_nr": 4 + "score": 0.5434967300739557, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3671001023271087, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.28130650893311265, - "sentence_nr": 4 + "score": 0.5548530261238012, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.1955267872750564, - "sentence_nr": 4 + "score": 0.41031782909564674, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.3128973144973244, - "sentence_nr": 4 + "score": 0.4055071421603482, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.2317282154699006, - "sentence_nr": 4 + "score": 0.37299539088865524, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17950199148531412, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.41980622655065647, - "sentence_nr": 4 + "score": 0.4270012392614725, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1664106020945361, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.31748822774623225, - "sentence_nr": 4 + "score": 0.4280857917577298, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.10157474763584896, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.350309379624823, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12626953198628743, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.16887665878255845, - "sentence_nr": 4 + "score": 0.28923398754126967, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14344436925456175, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.15774554820899148, - "sentence_nr": 4 + "score": 0.31855029095203735, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.26885090623067887, - "sentence_nr": 4 + "score": 0.1450561148847636, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10052035011320741, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.33867507760059357, - "sentence_nr": 4 + "score": 0.31620064580236706, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10628732960069, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.2996031251762614, - "sentence_nr": 4 + "score": 0.3283974297647533, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07866142497287454, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.2065195596274271, - "sentence_nr": 4 + "score": 0.22436847715795769, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11850541740049741, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2091048289203972, - "sentence_nr": 4 + "score": 0.3765127698761297, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1535686541317235, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2137123752967766, - "sentence_nr": 4 + "score": 0.43682738448704195, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1289623877962874, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "so", - "task": "translation", + "task": "translation_to", "metric": "chrf", - "score": 0.2146110419816635, - "sentence_nr": 4 + "score": 0.29257396221266196, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19422659659588543, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.36500123755057784, - "sentence_nr": 4 + "score": 0.4406265906948898, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18245501785036272, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.347144663116465, - "sentence_nr": 4 + "score": 0.41711042074816285, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.22481074167380632, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4682073476065324, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33626043540904865, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.31527346354330105, - "sentence_nr": 4 + "score": 0.47970564640010444, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3087246340317326, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.15897677846800023, - "sentence_nr": 4 + "score": 0.5190191533554683, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4909991753827356, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.24497983229225412, - "sentence_nr": 4 + "score": 0.6181007830367896, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07730563279435597, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.13995623895459872, - "sentence_nr": 4 + "score": 0.3100987773946611, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.149911771227821, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.4674855890757815, - "sentence_nr": 4 + "score": 0.46600977419086265, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.47300840366486596, - "sentence_nr": 4 + "score": 0.2344392143175243, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18055689930962057, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.41241863293127407, - "sentence_nr": 4 + "score": 0.36732605723857714, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.47300840366486596, - "sentence_nr": 4 + "score": 0.20723638316347853, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.3532706077955847, - "sentence_nr": 4 + "score": 0.1992845824168194, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1828540486873082, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.39742717581317527, - "sentence_nr": 4 + "score": 0.42270228247804714, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21534597645653109, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.41241863293127407, - "sentence_nr": 4 + "score": 0.4429153970503551, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.004468275245755138, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.48109077206853695, - "sentence_nr": 4 + "score": 0.13136586691610921, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.44027072457059613, - "sentence_nr": 4 + "score": 0.16742936811875977, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.3893385906010263, - "sentence_nr": 4 + "score": 0.11940504610649601, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1504843536148922, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.6402657401259225, - "sentence_nr": 4 + "score": 0.37031670283607354, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13597796343834895, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.6478447458847402, - "sentence_nr": 4 + "score": 0.30423050739361934, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0993195473228234, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.5695248405921098, - "sentence_nr": 4 + "score": 0.2774071822336668, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14828912728661656, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.6375973667745106, - "sentence_nr": 4 + "score": 0.37328040148644737, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2974539240705033, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.6024181637771958, - "sentence_nr": 4 + "score": 0.4914860034714028, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.6402657401259225, - "sentence_nr": 4 + "score": 0.2966236662378785, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.6426687509864203, - "sentence_nr": 4 + "score": 0.3980586645261528, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2817858902060068, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.6390929517394389, - "sentence_nr": 4 + "score": 0.5812000010572426, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.319143328029808, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.6677526933120659, - "sentence_nr": 4 + "score": 0.5439628345852634, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12584834793909203, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.6810253452803035, - "sentence_nr": 4 + "score": 0.4359538673356259, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23944732810452216, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.522423303475414, - "sentence_nr": 4 + "score": 0.5585059996994121, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.22786788980326644, - "sentence_nr": 4 + "score": 0.20365642689242272, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.7387739811627232, - "sentence_nr": 4 + "score": 0.46771026250334335, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14258147236508803, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.1329604040267493, - "sentence_nr": 4 + "score": 0.38105995944362, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.4471856677359072, - "sentence_nr": 4 + "score": 0.4149221101337332, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.452279977058944, - "sentence_nr": 4 + "score": 0.45080523333135825, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2497481028437212, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 + "score": 0.3974751279392944, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2904391540454532, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.5521590062829653, - "sentence_nr": 4 + "score": 0.49418835371490716, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3349675354847837, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.19293080730252732, - "sentence_nr": 4 + "score": 0.5234207823607111, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12371456571439915, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 + "score": 0.35562282938981676, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1274115988406287, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.4052165625503468, - "sentence_nr": 4 + "score": 0.3310991575890784, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.10982839842078235, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25807556121990866, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.46326814099366476, - "sentence_nr": 4 + "score": 0.23391395118711963, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2918271110182603, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.17568454593879543, - "sentence_nr": 4 + "score": 0.5908408959428468, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.3791082020994395, - "sentence_nr": 4 + "score": 0.2999540940698276, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2115594277679545, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.49816993286090683, - "sentence_nr": 4 + "score": 0.48937379669245845, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08545148602137898, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.44863216660495664, - "sentence_nr": 4 + "score": 0.36368508833748336, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05837196410192872, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.3302903244452369, - "sentence_nr": 4 + "score": 0.20919043801522816, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0763518370649764, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.3313166446105706, - "sentence_nr": 4 + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation_to", + "metric": "chrf", + "score": 0.22203628687051016, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06666337489197861, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.2721408830033411, - "sentence_nr": 4 + "score": 0.2641431149490312, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.2035563167651274, - "sentence_nr": 4 + "score": 0.23218076255522052, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.44863216660495664, - "sentence_nr": 4 + "score": 0.3454863565779109, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1637083741560297, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.3445703908458862, - "sentence_nr": 4 + "score": 0.39068717380984525, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29620044168375165, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.3196493787756392, - "sentence_nr": 4 + "score": 0.38507704427342426, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10373198738191458, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.2298321888233454, - "sentence_nr": 4 + "score": 0.35910226518392213, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13539161487207824, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.3098123540103845, - "sentence_nr": 4 + "score": 0.4090157301700233, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11401806409768651, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.1342477978716863, - "sentence_nr": 4 + "score": 0.3199134488826683, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15154847535138072, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.42511022061090775, - "sentence_nr": 4 + "score": 0.297329314233632, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11546530656868825, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.4279977800006272, - "sentence_nr": 4 + "score": 0.2652340411459836, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.3865378580315333, - "sentence_nr": 4 + "score": 0.01803434118675279, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15161074985415177, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.385169510617517, - "sentence_nr": 4 + "score": 0.26121051953869123, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12365973156022511, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.3579949693968627, - "sentence_nr": 4 + "score": 0.19243566997831077, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.7825422900366437, - "sentence_nr": 4 + "score": 0.07634618271335385, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.8503171627677965, - "sentence_nr": 4 + "score": 0.23992292805624504, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0977380780819774, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.502684666455707, - "sentence_nr": 4 + "score": 0.31987184819447434, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07526631223214, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.07149669287185864, - "sentence_nr": 4 + "score": 0.32200266452557863, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.4657143140310299, - "sentence_nr": 4 + "score": 0.2541074256214778, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.39265131162383077, - "sentence_nr": 4 + "score": 0.24384315520084382, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.40281955727233343, - "sentence_nr": 4 + "score": 0.24323684546156932, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43103580001357805, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.17013461044703918, - "sentence_nr": 4 + "score": 0.6690742226623104, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6031612036218008, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.19400141696774292, - "sentence_nr": 4 + "score": 0.736286703381354, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39432344823662835, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "bcp_47": "en", + "task": "translation_to", "metric": "chrf", - "score": 0.1721293079939147, - "sentence_nr": 4 + "score": 0.5943452555220106, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10772332006118607, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.12863206872658067, - "sentence_nr": 4 + "score": 0.17652714369664665, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11254397891886614, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.1377576543423856, - "sentence_nr": 4 + "score": 0.20623288988983426, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1100081929352474, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", "metric": "chrf", - "score": 0.19591640810784544, - "sentence_nr": 4 + "score": 0.18967061672400035, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27190910124573536, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.1550261157185766, - "sentence_nr": 4 + "score": 0.5173567851798608, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24493390281390082, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.1609718736281563, - "sentence_nr": 4 + "score": 0.48113625107113883, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19476681308252697, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", "metric": "chrf", - "score": 0.17601889426326467, - "sentence_nr": 4 + "score": 0.42030407727741037, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.429512074830509, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.15614020477710228, - "sentence_nr": 4 + "score": 0.6066779955199886, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3459789902390003, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.16685494585396754, - "sentence_nr": 4 + "score": 0.5620330456296532, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.37825713491091884, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", "metric": "chrf", - "score": 0.13064508924150248, - "sentence_nr": 4 + "score": 0.5584414289480568, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09596136927307748, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.4411812923020589, - "sentence_nr": 4 + "score": 0.40849147213099996, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1845747513433909, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.4179911123724079, - "sentence_nr": 4 + "score": 0.44379971518505973, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18212463619188357, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "bcp_47": "ar", + "task": "translation_to", "metric": "chrf", - "score": 0.3630314170535937, - "sentence_nr": 4 + "score": 0.469592540371137, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22972631482860506, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.39881946127463486, - "sentence_nr": 4 + "score": 0.436102988762466, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2517176762753373, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.22173190464216275, - "sentence_nr": 4 + "score": 0.45137344500317134, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3128384316903283, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", "metric": "chrf", - "score": 0.16727155744441868, - "sentence_nr": 4 + "score": 0.48016279207050283, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.45026965676007474, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.5352038615270772, - "sentence_nr": 4 + "score": 0.6668256174353906, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.43200638115383627, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.42874234643884424, - "sentence_nr": 4 + "score": 0.6892273787708799, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.341195158470265, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6539473951166187, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.49449516991634934, - "sentence_nr": 4 + "score": 0.4710260495003035, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.18861932879019175, - "sentence_nr": 4 + "score": 0.40475700826319555, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", "metric": "chrf", - "score": 0.1979106679566376, - "sentence_nr": 4 + "score": 0.4349871720911447, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.22637359354764466, - "sentence_nr": 4 + "score": 0.310186302993101, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.6010070471156334, - "sentence_nr": 4 + "score": 0.5434540129901786, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5244380103905697, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5880855470290005, - "sentence_nr": 4 + "score": 0.6914581279144536, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4472834999328078, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "bcp_47": "pt", + "task": "translation_to", "metric": "chrf", - "score": 0.5535439540882026, - "sentence_nr": 4 + "score": 0.6457130269652316, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17207258849758605, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.37334388072922814, - "sentence_nr": 4 + "score": 0.3052503498954155, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.504154287515855, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.49556192935701593, - "sentence_nr": 4 + "score": 0.6074467585243234, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18771816026273827, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", "metric": "chrf", - "score": 0.4912553801314764, - "sentence_nr": 4 + "score": 0.37594160796244835, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.377949467106015, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.65462584415807, - "sentence_nr": 4 + "score": 0.6288808546806746, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33684416564135483, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.5679837784050215, - "sentence_nr": 4 + "score": 0.6173496967095872, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.21054588509072256, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5020237474009813, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.28341626687166926, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5873831965245108, - "sentence_nr": 4 + "score": 0.5869314876429665, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11283678603002038, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.2558970368401232, - "sentence_nr": 4 + "score": 0.5027560731174364, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29432909534200313, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", "metric": "chrf", - "score": 0.5469398226382491, - "sentence_nr": 4 + "score": 0.5637465580755235, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.22894156860669912, - "sentence_nr": 4 + "score": 0.23399485663908418, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.6299487983245466, - "sentence_nr": 4 + "score": 0.5631067041333725, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.15362208233245514, - "sentence_nr": 4 + "score": 0.30894994002746395, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.6295157857600502, - "sentence_nr": 4 + "score": 0.6495798576994254, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "bleu", - "score": 0.18842393723950338, - "sentence_nr": 4 + "score": 0.35806497640912766, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "bcp_47": "id", + "task": "translation_to", "metric": "chrf", - "score": 0.5854975500881314, - "sentence_nr": 4 + "score": 0.5522671396375264, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.18842393723950338, - "sentence_nr": 4 + "score": 0.1793410088328766, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.5899792544547467, - "sentence_nr": 4 + "score": 0.5126133936832279, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.22894156860669912, - "sentence_nr": 4 + "score": 0.25325897139464854, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.6299487983245466, - "sentence_nr": 4 + "score": 0.511461689033225, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.3730973285213212, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", "metric": "chrf", - "score": 0.6568979068982934, - "sentence_nr": 4 + "score": 0.5926422939507472, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.5303624596095554, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.7835371347721495, - "sentence_nr": 4 + "score": 0.24942094354139677, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.6265140753983048, - "sentence_nr": 4 + "score": 0.9199349282509897, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 1.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.9199349282509897, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.19355919558818033, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.6568979068982934, - "sentence_nr": 4 + "score": 0.3931759629568022, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.162496560019558, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.46014996368181593, - "sentence_nr": 4 + "score": 0.47693678197649336, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "bleu", - "score": 0.22894156860669912, - "sentence_nr": 4 + "score": 0.21126480857843466, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", "metric": "chrf", - "score": 0.6299487983245466, - "sentence_nr": 4 + "score": 0.4132516611502927, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.16743424359150172, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.43311618815403874, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2044743996680912, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.6200828204097578, - "sentence_nr": 4 + "score": 0.5296253104099969, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.09990095999535835, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "bcp_47": "mr", + "task": "translation_to", "metric": "chrf", - "score": 0.658571547163188, - "sentence_nr": 4 + "score": 0.43541588074965143, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.2089685256289425, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.658571547163188, - "sentence_nr": 4 + "score": 0.5333424925429209, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.22894156860669912, - "sentence_nr": 4 + "score": 0.1689554748507331, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.6329467036048876, - "sentence_nr": 4 + "score": 0.48752020586891187, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.09863022371664866, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", "metric": "chrf", - "score": 0.658571547163188, - "sentence_nr": 4 + "score": 0.4410680148316049, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.1296922311601412, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.44753695206116967, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16157821959747307, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.6417603075499863, - "sentence_nr": 4 + "score": 0.5255622543684244, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.15748430756119847, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", "metric": "chrf", - "score": 0.658571547163188, - "sentence_nr": 4 + "score": 0.530020236187551, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.15415302247076879, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.658571547163188, - "sentence_nr": 4 + "score": 0.4123343961300446, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.2737928561916526, - "sentence_nr": 4 + "score": 0.14473479197868241, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.5581978650607443, - "sentence_nr": 4 + "score": 0.4413634590503217, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.1842259592735289, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.4512010919409926, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18087140599571747, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5800922255460801, - "sentence_nr": 4 + "score": 0.4377272235852682, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5766882097318834, - "sentence_nr": 4 + "score": 0.4566114002517467, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21245026220526622, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "bcp_47": "fa", + "task": "translation_to", "metric": "chrf", - "score": 0.5800922255460801, - "sentence_nr": 4 + "score": 0.5450614695192502, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.5807783428367905, - "sentence_nr": 4 + "score": 0.5469867443567507, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.558235742045378, - "sentence_nr": 4 + "score": 0.5049599536473849, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", "metric": "chrf", - "score": 0.5731807188469008, - "sentence_nr": 4 + "score": 0.464305874546181, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18034760660633942, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.6277082350099422, - "sentence_nr": 4 + "score": 0.20587735759782932, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24468026894076475, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.5744784106089311, - "sentence_nr": 4 + "score": 0.2749771763892432, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.24399387658656807, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3044305895074006, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.599418106384017, - "sentence_nr": 4 + "score": 0.3147902135663803, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.4285022577748209, - "sentence_nr": 4 + "score": 0.24977763651196191, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", "metric": "chrf", - "score": 0.45932416060974035, - "sentence_nr": 4 + "score": 0.21237639792675794, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2693653652960018, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.4666156174173635, - "sentence_nr": 4 + "score": 0.5351323891749961, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.28494569863631247, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.3893867836646916, - "sentence_nr": 4 + "score": 0.5314415165747192, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23269544971552114, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "bcp_47": "it", + "task": "translation_to", "metric": "chrf", - "score": 0.38246468665452293, - "sentence_nr": 4 + "score": 0.49749977919083926, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2503202840132539, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.388047366459405, - "sentence_nr": 4 + "score": 0.6628587960881935, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3780253819893537, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.4666156174173635, - "sentence_nr": 4 + "score": 0.6691463628105327, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2764824153808333, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", "metric": "chrf", - "score": 0.441761958013597, - "sentence_nr": 4 + "score": 0.669057962133461, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15197630365858814, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.43608445006847185, - "sentence_nr": 4 + "score": 0.3903562094322204, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.43608445006847185, - "sentence_nr": 4 + "score": 0.2991476612258654, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", "metric": "chrf", - "score": 0.4282343341370423, - "sentence_nr": 4 + "score": 0.3082702687589522, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17677215260187162, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.5238865952545348, - "sentence_nr": 4 + "score": 0.42492794367017145, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.125959010609916, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.27828716886545535, - "sentence_nr": 4 + "score": 0.4578429795465262, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", "metric": "chrf", - "score": 0.43608445006847185, - "sentence_nr": 4 + "score": 0.3002210625771089, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.22312700803550112, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.21732734812103588, - "sentence_nr": 4 + "score": 0.41892404547996925, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2686036309072948, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.2673895048733062, - "sentence_nr": 4 + "score": 0.46796278650100787, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.32114886466116627, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "bcp_47": "th", + "task": "translation_to", "metric": "chrf", - "score": 0.28158744196562724, - "sentence_nr": 4 + "score": 0.4290130719589314, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.38791398909746805, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.16486756282784554, - "sentence_nr": 4 + "score": 0.5307069804730096, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3021887988636079, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.20255581298259964, - "sentence_nr": 4 + "score": 0.45419878739503283, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23552484042758592, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", "metric": "chrf", - "score": 0.1686130658229696, - "sentence_nr": 4 + "score": 0.4709821983218137, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15942530661337126, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.37041873534869646, - "sentence_nr": 4 + "score": 0.5151997420421937, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26012602101891624, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.2627375617772967, - "sentence_nr": 4 + "score": 0.5371799694530636, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.14137375964454066, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4881276325562942, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.23558051670852123, - "sentence_nr": 4 + "score": 0.3280456612129034, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05156674665890638, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.14476982749981784, - "sentence_nr": 4 + "score": 0.36698218242626135, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", "metric": "chrf", - "score": 0.16632804710475912, - "sentence_nr": 4 + "score": 0.3430290995002961, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10615684540251687, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.13678452669387658, - "sentence_nr": 4 + "score": 0.460046617317305, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09952498083578393, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.6382466300772751, - "sentence_nr": 4 + "score": 0.3978595245805609, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "bleu", - "score": 0.1075467277016126, - "sentence_nr": 4 + "score": 0.10916808066739564, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "bcp_47": "pl", + "task": "translation_to", "metric": "chrf", - "score": 0.5012312009859288, - "sentence_nr": 4 + "score": 0.43671495255219495, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.4603854172427722, - "sentence_nr": 4 + "score": 0.413922696186207, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.34281202986923937, - "sentence_nr": 4 + "score": 0.38459672847260074, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", "metric": "chrf", - "score": 0.23213285024557784, - "sentence_nr": 4 + "score": 0.41318454544592675, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13839209880933745, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.5328062114240609, - "sentence_nr": 4 + "score": 0.35315147740153213, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14073835588074438, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.3993751732887897, - "sentence_nr": 4 + "score": 0.3186851468537677, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.11092528636668526, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.31604383600836816, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2209022359029063, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.5177301811811107, - "sentence_nr": 4 + "score": 0.5243023558160568, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27300740971755855, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.14189921489362475, - "sentence_nr": 4 + "score": 0.5381314258991404, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17139917523591472, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation_to", "metric": "chrf", - "score": 0.15971500975156616, - "sentence_nr": 4 + "score": 0.4864810046478068, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18174134858444663, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.44355652237335036, - "sentence_nr": 4 + "score": 0.48176247427646596, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3065887048692417, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.40736387061175394, - "sentence_nr": 4 + "score": 0.5157029274056539, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18604195434563767, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "bcp_47": "my", + "task": "translation_to", "metric": "chrf", - "score": 0.37489485923390314, - "sentence_nr": 4 + "score": 0.4122715353511428, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06195434067782697, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.47418667083462274, - "sentence_nr": 4 + "score": 0.16175760521413005, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1063765571906396, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.2564816085214212, - "sentence_nr": 4 + "score": 0.24881090381462032, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation_to", "metric": "chrf", - "score": 0.2767281181183261, - "sentence_nr": 4 + "score": 0.18900561821854683, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.45023339690464936, - "sentence_nr": 4 + "score": 0.2868359127518435, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.4487353880719661, - "sentence_nr": 4 + "score": 0.34494300151091684, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3626648377566182, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11552139626294972, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.29472525951124, - "sentence_nr": 4 + "score": 0.3398949301287284, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1659833448676102, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.17075562096098212, - "sentence_nr": 4 + "score": 0.3769282770836756, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20111668406029967, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation_to", "metric": "chrf", - "score": 0.3093797352942689, - "sentence_nr": 4 + "score": 0.4330800722857856, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1253868308634287, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.21421851674109063, - "sentence_nr": 4 + "score": 0.46617501074123047, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3130482255340303, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.15753286601971267, - "sentence_nr": 4 + "score": 0.6463371475277107, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14582974563534895, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "bcp_47": "uz", + "task": "translation_to", "metric": "chrf", - "score": 0.21413630439620454, - "sentence_nr": 4 + "score": 0.4561374068921452, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.18294404750126714, - "sentence_nr": 4 + "score": 0.39062260559157314, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11476846027014086, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.18265664536277676, - "sentence_nr": 4 + "score": 0.3716193833315997, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0990145402804052, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation_to", "metric": "chrf", - "score": 0.22528910215642392, - "sentence_nr": 4 + "score": 0.3832964227606499, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1103430168770977, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.43180333528957987, - "sentence_nr": 4 + "score": 0.3888835203143206, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0820956843607846, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.17635214465529284, - "sentence_nr": 4 + "score": 0.3672961979619499, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.09351434853192983, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3544533317605768, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1346186114956404, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.18870691281979324, - "sentence_nr": 4 + "score": 0.4525877580002142, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.18957700837099745, - "sentence_nr": 4 + "score": 0.3826135556435099, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09469433563240592, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation_to", "metric": "chrf", - "score": 0.20825973272491313, - "sentence_nr": 4 + "score": 0.42242458160008445, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.30354067465892703, - "sentence_nr": 4 + "score": 0.3003667779466027, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12988310144953827, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.4283147867664682, - "sentence_nr": 4 + "score": 0.44924139159069637, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "bcp_47": "ary", + "task": "translation_to", "metric": "chrf", - "score": 0.28583707879882797, - "sentence_nr": 4 + "score": 0.30212704629782045, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2850404597553743, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.2763272612915231, - "sentence_nr": 4 + "score": 0.5124976639796699, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15453491615716408, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.37329018470596154, - "sentence_nr": 4 + "score": 0.46885369942326544, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.29666196021313485, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation_to", "metric": "chrf", - "score": 0.3522470517026368, - "sentence_nr": 4 + "score": 0.5228686595434603, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.3800125473157396, - "sentence_nr": 4 + "score": 0.11148956644753055, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.4181721116054787, - "sentence_nr": 4 + "score": 0.11036624147130886, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation_to", "metric": "chrf", - "score": 0.30059912918058257, - "sentence_nr": 4 + "score": 0.07967090534466029, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1756638973945762, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.43580099202398337, - "sentence_nr": 4 + "score": 0.43710438313784017, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2237029203522163, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.24528802733610966, - "sentence_nr": 4 + "score": 0.43143618833956604, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12117155141304674, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation_to", "metric": "chrf", - "score": 0.3820163726862325, - "sentence_nr": 4 + "score": 0.4177794744541551, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24748635254040408, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.4802701406922108, - "sentence_nr": 4 + "score": 0.5863533707100985, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2588423886300811, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.490032576569998, - "sentence_nr": 4 + "score": 0.562682275218684, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23784181416667124, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "bcp_47": "ceb", + "task": "translation_to", "metric": "chrf", - "score": 0.47018722626716275, - "sentence_nr": 4 + "score": 0.5373128304085978, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1530279090066045, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.40669095468248206, - "sentence_nr": 4 + "score": 0.37271468092752974, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20113161707875454, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.3364967199973792, - "sentence_nr": 4 + "score": 0.4031608093713332, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation_to", "metric": "chrf", - "score": 0.3412793461743446, - "sentence_nr": 4 + "score": 0.305586228526799, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.2722589423069702, - "sentence_nr": 4 + "score": 0.09665585076298037, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.6457595781467534, - "sentence_nr": 4 + "score": 0.46061627151838797, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07358577950614774, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.2939031491424918, - "sentence_nr": 4 + "score": 0.3231011038063018, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation_to", "metric": "chrf", - "score": 0.3330112821010062, - "sentence_nr": 4 + "score": 0.30653028403316734, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.16467029855845897, - "sentence_nr": 4 + "score": 0.31954590001023825, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.6061986709228673, - "sentence_nr": 4 + "score": 0.569897649672018, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27483441916677864, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.3106079785428179, - "sentence_nr": 4 + "score": 0.6110044168996488, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.26585072553894457, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation_to", "metric": "chrf", - "score": 0.39468447173978904, - "sentence_nr": 4 + "score": 0.6103483251513802, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.2460260310809598, - "sentence_nr": 4 + "score": 0.3162413313359514, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.17283382641366998, - "sentence_nr": 4 + "score": 0.2590066624776712, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "bcp_47": "ne", + "task": "translation_to", "metric": "chrf", - "score": 0.20022065596322774, - "sentence_nr": 4 + "score": 0.24396774868005255, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07195213544257117, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.20681826954034505, - "sentence_nr": 4 + "score": 0.3120035251108586, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.16647457450300468, - "sentence_nr": 4 + "score": 0.2608300934746321, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation_to", "metric": "chrf", - "score": 0.1736099371373941, - "sentence_nr": 4 + "score": 0.23931793541759633, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.40074832458844545, - "sentence_nr": 4 + "score": 0.25267285423995856, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.30027816373416877, - "sentence_nr": 4 + "score": 0.38987700784253204, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23982170773883157, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10089967636688787, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.3455509477754168, - "sentence_nr": 4 + "score": 0.4751083796007961, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.15923219431794336, - "sentence_nr": 4 + "score": 0.40316339131278667, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation_to", "metric": "chrf", - "score": 0.19546100969052438, - "sentence_nr": 4 + "score": 0.2597280164114605, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18981964720217231, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.14697628025481496, - "sentence_nr": 4 + "score": 0.5202248700201121, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.24657376730321656, - "sentence_nr": 4 + "score": 0.48033951380023887, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "so", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05053600948320145, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "bcp_47": "so", + "task": "translation_to", "metric": "chrf", - "score": 0.291098552209934, - "sentence_nr": 4 + "score": 0.33779456449270456, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10743641120305437, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2514738930704131, - "sentence_nr": 4 + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation_to", + "metric": "chrf", + "score": 0.3861534614461426, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11282878483968255, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.2584734035489983, - "sentence_nr": 4 + "score": 0.42989143371398053, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1423452551536939, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation_to", "metric": "chrf", - "score": 0.16655213460140914, - "sentence_nr": 4 + "score": 0.35890833581327075, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2572958792096885, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.41031802646691806, - "sentence_nr": 4 + "score": 0.524791117806158, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.42323331934594827, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.3163845376082513, - "sentence_nr": 4 + "score": 0.5980458552441091, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18643966530029588, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation_to", "metric": "chrf", - "score": 0.04945189800447702, - "sentence_nr": 4 + "score": 0.472250969694742, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23051105230450533, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.3285364659042227, - "sentence_nr": 4 + "score": 0.461339165198002, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3332932475073326, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.19571523326731263, - "sentence_nr": 4 + "score": 0.5318071826870329, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12555000630534552, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation_to", "metric": "chrf", - "score": 0.19055236832274566, - "sentence_nr": 4 + "score": 0.37494030279780705, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11094349821042929, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.4263413781248326, - "sentence_nr": 4 + "score": 0.2906910636210368, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.4359367610984378, - "sentence_nr": 4 + "score": 0.3302669985131613, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10429663629438554, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "bcp_47": "km", + "task": "translation_to", "metric": "chrf", - "score": 0.3562570068438905, - "sentence_nr": 4 + "score": 0.2618510544761293, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.35876705764201644, - "sentence_nr": 4 + "score": 0.335832686231422, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.35593753263346334, - "sentence_nr": 4 + "score": 0.2087257243528052, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation_to", "metric": "chrf", - "score": 0.4429471433550604, - "sentence_nr": 4 + "score": 0.29242900341172495, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.4225308643688333, - "sentence_nr": 4 + "score": 0.16596275045554132, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.4438828875875134, - "sentence_nr": 4 + "score": 0.18375130705991488, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation_to", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.165036917351539, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.4438828875875134, - "sentence_nr": 4 + "score": 0.4238185315546578, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.3074300439616791, - "sentence_nr": 4 + "score": 0.37123872741984293, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05937938635402215, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation_to", "metric": "chrf", - "score": 0.36170303745193194, - "sentence_nr": 4 + "score": 0.2388282875974679, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23154237167901778, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.11383643766535269, - "sentence_nr": 4 + "score": 0.5238880458237484, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.24449972325632238, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.16579761015459532, - "sentence_nr": 4 + "score": 0.5510860378791704, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10987931098593881, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "bcp_47": "kk", + "task": "translation_to", "metric": "chrf", - "score": 0.191825135463227, - "sentence_nr": 4 + "score": 0.35341923833767647, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2106721715165928, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.15889147720402258, - "sentence_nr": 4 + "score": 0.3907583551227623, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2632572840179757, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.17501431522455, - "sentence_nr": 4 + "score": 0.5114029284363146, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.33060846075951006, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation_to", "metric": "chrf", - "score": 0.18389425700362821, - "sentence_nr": 4 + "score": 0.4933751565421874, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13269353024089545, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.15961789994114992, - "sentence_nr": 4 + "score": 0.44418557281661125, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11602404900806981, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.1802392930711122, - "sentence_nr": 4 + "score": 0.4554201930500723, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2209022359029063, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation_to", "metric": "chrf", - "score": 0.23426035903837622, - "sentence_nr": 4 + "score": 0.486429600270988, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1997226634456582, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.1705216477556015, - "sentence_nr": 4 + "score": 0.41104513420673316, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2428802468105601, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.1638149737231437, - "sentence_nr": 4 + "score": 0.46168046249055017, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.257115532412176, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation_to", "metric": "chrf", - "score": 0.18391384242077483, - "sentence_nr": 4 + "score": 0.42838270664314443, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.1655577687696921, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.503754492272343, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.2560013421711881, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.5334001391869971, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.2437911340271475, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "bcp_47": "el", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5216180636484092, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 + "score": 0.3516991073654955, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 + "score": 0.41790303792506495, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation_to", "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 + "score": 0.25390079010824235, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.011202719508723062, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0955315433384972, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 + "score": 0.39731738011391643, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.1297531338447625, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.41932803455959666, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.10591848049078299, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.49085304041008937, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.36356644720160713, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation_to", "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 + "score": 0.20108339482651794, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.156815032666708, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.23634008457609298, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "bcp_47": "wo", + "task": "translation_to", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.1614830616662192, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.2979511478422567, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.3438754918495763, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation_to", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.2895669129947285, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 + "score": 0.42338076501360705, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.06874519953789905, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.380714941951927, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation_to", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.3207893927453622, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.493823787340623, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.5242768575529999, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 + "score": 0.06554510293342969, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation_to", "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 + "score": 0.3359614697928464, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.2828367156737383, - "sentence_nr": 5 + "score": 0.08331902517941604, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.1121490311188424, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "bcp_47": "ti", + "task": "translation_to", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.0636893546858904, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.26613226165653087, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.4704357379534384, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.267579029762668, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.4771565913693486, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 + "score": 0.18602980983305786, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation_to", "metric": "chrf", - "score": 0.7513336773729535, - "sentence_nr": 5 + "score": 0.38203825355509946, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.4814564802258215, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.7621649608882223, - "sentence_nr": 5 + "score": 0.2184201291608897, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.24360280654917382, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation_to", "metric": "chrf", - "score": 0.7341375356694393, - "sentence_nr": 5 + "score": 0.2189734700985919, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.2404315522172745, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.49155714102395526, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3477250470582593, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7188419868243952, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6392900613840917, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6392900613840917, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.4386229919587297, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5379348324975908, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7703766110349561, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.22391522968021457, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6087618281135659, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4621757041594117, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.22067731046885494, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5635661737033422, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7689532399280165, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7773819133344605, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7338978299765546, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.24011079455637607, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.19920494035049138, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.614209720001149, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.5896613549548209, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7528914749586836, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.5300714512917181, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7461630750708693, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.33359103227594633, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.701102363286568, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.5300714512917181, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7461630750708693, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.5271017464925504, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7749613594649343, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4335364472118335, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.6878319610579101, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7032048786770096, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.24706467963183681, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4801289744823913, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6766690087429765, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.30421485886156485, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.566236392445952, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.32965129549221617, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.623436907204599, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.32078739729528816, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5817366082116868, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.44332438338421004, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3665134361137304, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6118771029352303, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5764325110247531, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5878575558111695, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.06088829927112382, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4100134571476398, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5856608401367807, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.17098323692758396, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5216877937894046, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3527295712700594, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6062826429226292, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.2799331151961311, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.6471892368478446, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8142499721936278, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.40202477345336673, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.7469480084357536, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.40157733283424196, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.7133166401137868, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8416888527493164, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.7494665344743727, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.42612283570374254, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.7185121839177114, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5199388279318895, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.23141570376732995, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5938624587877649, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5786592584609213, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23713320246552005, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6106842970161642, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5384773678665918, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5786592584609213, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.22128776529156546, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5609439249510223, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.1998573974138024, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.540043957078071, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3282518529729176, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6453010665294326, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.30752616970214336, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6051452460471443, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.310441435588881, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6413164971104282, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5814841210741494, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.22656720908801994, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5465750236858569, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6736142284622013, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6888365053466561, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.8656273480576243, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3416581331218724, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6578570934289981, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3423591961656694, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6570214418399444, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6888365053466561, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.8656273480576243, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.629934465484704, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6736142284622013, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.46965980060137014, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.629934465484704, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6935397252637394, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5069487414732323, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7801245319017357, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.6358974376699329, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.736661937085844, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.8416888527493164, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6935397252637394, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.30614023358320086, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5870676308171808, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2281399713503153, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6211104268881504, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7667541011433795, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.705252762035012, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.445107576642247, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6955301378913092, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.30752616970214336, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5976254557718147, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.40157733283424196, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6532350818978572, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6438225861756911, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.31374450602681464, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6422405832556486, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4924584878270648, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7062510642584722, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.36227557436010244, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6470050797908481, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.6383964846132485, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.8155153170229187, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.419468515826214, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6664000694648706, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.010321080079207262, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.06492787287290114, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4938015541936678, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7820348786317745, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.12858902882463452, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.35477908164501704, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6316031412228033, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7046532915279582, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7202697992734389, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7202697992734389, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.1943759862788499, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.49688103957939267, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7046532915279582, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7046532915279582, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7046532915279582, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.11970700565377682, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.23357697166633196, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5582260842665357, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.24363783193706642, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3903594390682207, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6662116837137958, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4727805712999679, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7717158158167359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.36816017035411847, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6630063658071765, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4335364472118335, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6966914157873363, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.39174440233850644, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6762795187534849, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4526810222444627, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7303764654257315, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3809666991864665, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4393160369685383, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7326708250282779, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.16449149670902838, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5337097549575721, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.6349495142258627, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.7749613594649343, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4719458927872361, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6863265729154345, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6990707992725005, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.43385612637937937, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6552557413442657, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4719458927872361, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6863265729154345, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.37973023491174585, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.727435179202121, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4598036015897535, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6256401299595566, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.43385612637937937, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6552557413442657, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.32084466348045076, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.22063120635885589, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.5852924591274146, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.31008822704072875, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.31008822704072875, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.31008822704072875, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3836374068673084, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.31802371065401513, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.31008822704072875, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.224188058954654, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5978847447208526, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.22894939325531252, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5747669845604989, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.27545321289806546, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6280000881172884, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.2680165156355779, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.545567244447617, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3086172473271798, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6217822674304354, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3495365897197661, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5973579837199989, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3086172473271798, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6217822674304354, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.36539169772085134, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3411488281065382, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6740035136770584, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.19920494035049138, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.614209720001149, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.7221847203387323, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.8931067231936596, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3416581331218724, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6578570934289981, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.92923260511913, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.23357697166633196, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6610479563844994, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.23374920560961487, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6381858968225665, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.38411167208361274, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7037084318891839, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.220294066346937, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6375628454216249, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.23357697166633196, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6610479563844994, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.8385943306861641, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.13737279171076758, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.42785667387454995, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17923344640485428, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5211683330085515, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7644556249154987, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7342525133793019, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4797543511401896, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7240781310560407, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7342525133793019, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7342525133793019, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.569529411820844, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.20323131695812172, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5371005942781321, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7143127337179475, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5679161104357995, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7564733289707379, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.2567770437062668, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.38457089506267517, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6582292681072595, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.14107526427034148, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.6401876410870359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.7526484951226097, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.5184341074271375, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.7295047041623038, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4252502464011162, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6774296788457803, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42643704825557327, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6730449758221991, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.5267476983756256, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.71821462156359, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4252502464011162, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6803639512204375, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42643704825557327, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6730449758221991, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.28648682864686603, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.543546241720005, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.20313747122261766, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5392632080295834, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.18623343474790552, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5348516130206653, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.24914989711092594, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3240220869485148, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5364140651922888, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4301823405286034, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4084622939366714, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.41786513699087335, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33032772118856274, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5637799127470854, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4301823405286034, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5468017145144113, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7519227909172003, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5420890779002704, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7268331815757023, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4790714250659131, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7010793195917541, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5420890779002704, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7268331815757023, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.617939643800199, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8356543644789964, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.617939643800199, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8356543644789964, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8416888527493164, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.3386854985606571, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.604413581883028, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.7663313999772253, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30042054271881197, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.27720938018510377, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.428047180290638, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30350690419450826, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.2813985981593422, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.28107488868712643, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3334615788010355, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.2770051233854291, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.42513375642407447, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3050638713235347, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5082087402765254, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3395533581184405, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30142704700265815, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.2773273497281852, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.29942074717273737, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.22847893469128855, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.6281881652405527, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7361567090943679, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.569133886912883, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6834516951654327, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.16807611261595506, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4597054186181326, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3343063479794574, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5429171669983389, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.2915369229944523, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.535395621261131, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.4504780990115136, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6386322492678208, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.4504780990115136, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6386322492678208, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1860962119549805, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.44847510774689797, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.623652672746999, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7064310568035931, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.21241088191397664, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.21511238963872098, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.48967538401421223, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.18951629567590744, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5515559648122452, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.27249745234058675, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.576487806400357, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.32078739729528816, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5779838399768712, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.32965129549221617, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5788023273137882, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.1860962119549805, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5438504570088443, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.19032892442937785, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5194565258434112, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.548958765126221, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7425459638873632, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.18437427949667837, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5211412954589442, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.18951629567590744, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5114358081515511, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3365047447281543, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5791325287918098, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.14598608091257087, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.4458625802506543, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.1860962119549805, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5438504570088443, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.25811803218589047, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.4814564802258215, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.3742893656007335, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7582803042224814, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.6316839256114659, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.8143078359179658, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.5069487414732323, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.8112065454752675, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7492834759166062, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.4814564802258215, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7621649608882223, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.15573964185427053, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3372949202573946, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.22894939325531252, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.6048598347770396, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.602867050301643, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.8176176657543648, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.5386695403411698, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.378882732439682, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.6841096204411963, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.2111187176080899, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.6020583416224236, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.6947687298202525, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.2534837513667069, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.17601203382268035, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.671938683171001, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.5206571060403834, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.689324258927, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.30344371233327844, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6219235056961488, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.21555378801920327, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5577976700241679, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.21030548059060677, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.41421927364643524, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6689624906287334, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.4063022828070774, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6789996206024372, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.3237833370387541, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5986110578496675, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.3957399456352439, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6505957913794083, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.11217219041746629, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.27571859863660825, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5218771218644234, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.4174441728660793, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6692136096184196, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.636016958488394, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.636016958488394, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.636016958488394, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.636016958488394, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.39811631946890474, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6320908834639722, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.4174441728660793, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6692136096184196, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.4174441728660793, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6706681340881337, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.4174441728660793, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6692136096184196, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.39811631946890474, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6320908834639722, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.17837875461384597, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.636016958488394, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.4794224895461657, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.17150296156301634, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.48812954881732445, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.46076979395163187, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.39000168645396877, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.35094536062899695, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.29898487912917937, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.2291182149355119, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.6293162592248092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.22848056414159593, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.5921402782211889, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.21511238963872098, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.5217348733264977, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.1513630224364002, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.32937492594263224, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.18879521773374403, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.4618333673677675, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.29623686353922923, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.41682189465797687, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.6573099561830166, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.2852636439147137, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5851048071392815, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.17636478563502966, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5283932773245016, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.4203546552244347, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.1196655750514248, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.29141398801197316, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.3143882918965084, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.6566540385253401, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.3088448141335011, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.7035462512447451, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.4186091892833126, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.6393114196475629, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.11856660123276004, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3311682798096144, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.3164257177669852, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5851860325042342, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.37494051432044967, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.661973437204244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.37494051432044967, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.661973437204244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.6358974376699329, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.736661937085844, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.4126152034907945, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.6941474239078328, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.6120737901860179, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.8083636300305905, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.6452772832060505, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.8169530087932871, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.5965623111029279, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.4878595420976541, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7843954055342302, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.43600387912116445, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.753502627596917, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.2091599003776314, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.6050299218248014, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.30407761511253945, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.14628563604185, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4777301300307737, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.29463458509790974, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.3659063107278196, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.2516441111691874, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.5379762757309059, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.445107576642247, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.6959585094274452, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.19803162353826262, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4896673252212308, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.1719815974592925, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.48509337647058, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.21542121044605517, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.2770051233854291, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.45226514916414134, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.14221479650735855, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.31177258041697303, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4558951086991579, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.26035572673286655, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.40109985662775005, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.28460812517661593, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.40547044606076843, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.2966090320349725, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4554891527388646, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.21044444652079192, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.31177258041697303, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4558951086991579, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.3109048971841926, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.46948666843707054, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.31177258041697303, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4558951086991579, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.06180170963975448, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.2966090320349725, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4407593221936027, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.4912131536580228, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.19018868394774802, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.5224363928471276, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.2989381657659374, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.22787958971339076, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.31471886527056153, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.3186225396765539, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.28648682864686603, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.5869807022024393, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.39047054966928285, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.35658220852248057, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.4755135386468395, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.22543269140466307, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.26195614303411313, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3052690053887312, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.37754323999245865, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6551391601089249, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.007047108999241661, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.3143882918965084, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6566540385253401, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.19075975291258387, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.46866709139162926, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7535887063318502, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.49779244057305255, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7564822254497499, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.445107576642247, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7263332833450973, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.3875407750115175, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.6320601493723194, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4814564802258215, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7621649608882223, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4441961115027302, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7565542718609186, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4441961115027302, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7565542718609186, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4441961115027302, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7565542718609186, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.022094354803669156, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.17001078098404226, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.5419100975160638, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.20731650338051813, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.30094298890378757, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.5463695830483137, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.4529852871970908, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.6379815839992429, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.5192080836782018, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.7354576496586976, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.2476165058078653, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4758373883319851, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.30702571862234085, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.5468678237231712, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.31620074377638474, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.5547605030697765, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.3570583512587401, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.6254475447872198, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.3164257177669852, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.5346911495697637, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.2680165156355778, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.49832559693883355, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.40505565245920605, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.4719458927872361, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.6884790828920573, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.8449079689944796, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.47229389414007084, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.7400562860667964, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.8449079689944796, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.44476089284108944, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.7117099802230009, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.464417424315424, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.8449079689944796, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.4652992071811419, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.8449079689944796, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.16024827804273534, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.5317530290531944, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.717128056256897, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.7219273458493682, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.44353395455270217, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.6913921626327173, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.445107576642247, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.6959585094274452, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.7219273458493682, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.717128056256897, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.717128056256897, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.717128056256897, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.717128056256897, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5420890779002704, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.7268331815757023, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.1754724247395998, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5107757383228504, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.5401725898595141, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.717128056256897, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.15821285888349254, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4716642229159947, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.39537383933343595, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.617311647158499, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4393606972268638, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.33359103227594633, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.600423959503607, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.1090009697802911, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4398327744078621, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.27330421266729565, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.2693466632631657, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5309206051118546, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.15821285888349254, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4716642229159947, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.44582080548137204, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.30335283306274363, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4283853203897149, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.19653306323688033, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.42643704825557327, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7385191646867102, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.6511785024442115, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.6511785024442115, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.3984098807009828, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.6511785024442115, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.39811631946890474, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.647088606333153, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.44542488150142195, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.684375153574237, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.44542488150142195, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.6856658569072438, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.39811631946890474, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.647088606333153, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7663313999772253, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.01886112664631915, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.4466679873664062, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.6799281948338153, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.31011575752288345, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.6452682411767686, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.5021277621795815, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.761461458169805, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.32393211943598493, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.6474115867020543, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.127408104603236, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.43993351395478764, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.09594785034023696, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.3865833291360058, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0810371533925042, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.31506451640481287, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.4126152034907945, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.6732486266096863, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.32393211943598493, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.6474115867020543, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.31011575752288345, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.5614133812306671, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.15462473462874404, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.1532685994792829, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.4662651599106109, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.1998573974138024, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.48166604565689325, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5155781222766946, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.14757581190431865, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.4573311375774372, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.15415064977510756, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.4576774423186101, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.4592064719908953, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.6719224520740146, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.309149936440332, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.1572663785778846, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5122325315328802, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5123550952856714, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.15415064977510756, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.426724812729464, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.3165967665056337, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.1504843536148922, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.45203030924244314, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.4391254859388873, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.7138345915744736, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.7663313999772253, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.28320384389628495, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.531318006400462, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.30643882011101126, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.3951500216160541, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.6335042145699192, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.33359103227594633, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.708644913877036, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.7032048786770096, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.6358974376699329, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.736661937085844, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.17059573701616795, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.4753746252238087, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.40783219447079366, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.21951524426618454, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.4774650578315169, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.1868514164295723, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.5483968819141473, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.18759202316167214, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.4754599799412878, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.7032048786770096, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.208795826063924, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.5361160056750558, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.13817790393734294, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.368011314076858, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.22063120635885589, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.5781205353252427, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.18107197870881736, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.514661439036253, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1813423031516851, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4972101263590737, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.18107197870881736, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.514661439036253, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.18107197870881736, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.514661439036253, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4724990991697275, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1860962119549805, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.49308679743240463, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.18107197870881736, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.514661439036253, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.18107197870881736, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.514661439036253, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1712473044894657, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4635173016830622, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1860962119549805, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.49308679743240463, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4724990991697275, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.1860962119549805, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.49308679743240463, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.19835441454182887, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6062730082124886, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.4074362040846933, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.7664523614495178, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.41443024325505773, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6339071977529499, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.6912804407652906, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.8416888527493164, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.31941303791548753, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.4444385005047057, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.4441961115027302, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.7565542718609186, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.23376909505556828, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.2281399713503153, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6211104268881504, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.2797830107070484, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.4727805712999679, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7717158158167359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.4727805712999679, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7717158158167359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.6030730571413818, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.8475480354796681, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.29945160623183903, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5546772816797799, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.4814564802258215, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7621649608882223, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.3423591961656694, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5502001052739403, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.15875722180934987, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.383354750306024, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.24769802565621082, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.6086092624563071, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.39670882908365773, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5348878791728369, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.27447938256311044, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5315032895817616, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.39670882908365773, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5409379877245147, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.39670882908365773, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5409379877245147, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.20124833529317487, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.44401287900537567, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.1719815974592925, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.36994072673675993, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.3426116434593994, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5133388823873302, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.17894177180728454, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.44133619978439725, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.16219748681741689, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.32210458788767854, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.16102642769112474, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.31256889728975074, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.1906936342773436, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.5160021246888273, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.2534743707366162, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.6254912096804822, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.36291227725384023, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.36291227725384023, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.28812859193424567, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.4255747984644291, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.2595151369628945, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.6384214365516487, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.465921984618579, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.21576146358278564, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.5550645714484712, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.23600051863022123, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3782353749787568, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.33713540983351536, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.6250009083207365, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.5186653964016543, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.6561896817871797, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.40562163465277223, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.27779711191658313, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.34011486844537747, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.21668238955829155, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.5367532631666345, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.769322145613854, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.27338866536239, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.5914573885612058, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.2297523682812302, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.5487323556475315, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.1570855113100852, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.21294973841939238, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.2922968824016215, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.3632703907932562, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.6409597524502569, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.7221847203387323, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8931067231936596, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.7513336773729535, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.319857965106966, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.5393982413372412, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.33359103227594633, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.708644913877036, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.33573064840973227, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7081054397334158, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.5263595737059831, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.33573064840973227, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7081054397334158, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.39022736644855677, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7405955935175441, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.2927057121559396, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6662552505924692, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.48740622698799413, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6794397309157819, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.39022736644855677, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7405955935175441, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.39022736644855677, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7405955935175441, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3070898761263382, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6756152855124968, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.39022736644855677, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7405955935175441, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.18759202316167214, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6184696220924114, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.18759202316167214, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.557445955724393, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.1375101316530452, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.5540102467708582, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.8012607361988002, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.1897992267368494, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.4726855583591889, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.35559255894860375, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.3109048971841926, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.5254124510546129, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.1860639131207794, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.7338978299765546, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.13352096115615372, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.4074362040846933, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.6464385241097694, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.7482524153102477, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.8447038922744422, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.5021277621795815, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6665605281744408, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.70066471582382, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.4922088386970059, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6106264390339488, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.32181275536083825, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.5021277621795815, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6982774997236794, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.7076534431960262, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.8413115375600476, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.4719458927872361, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6681602842119448, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.4922088386970059, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6106264390339488, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.42916222731145903, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.2567304004995466, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.5526271274789324, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.1821163528973126, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1983544145418289, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.4195908478809098, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.27970267298955453, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1378592993183041, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3752233237961983, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.22147622285255003, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1023857820560022, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.38421646372776175, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.254816209206472, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.5814444640902606, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1969221590285716, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.4139107793324548, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1733705613469748, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3993932385978296, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.23927943403430146, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.2036972232991139, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.32937303862037204, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.3124684968073947, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.29948848396607075, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.29901594860271813, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.15885824292629303, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.2396544472075596, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.3711366792786969, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.25913517321015245, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.28697807819754534, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.28653249812917597, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.22607786658046147, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.2296291837985481, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.4341999352730602, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.6745907228091957, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.1986589078880532, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5285168275193599, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.20110004903792847, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.45623478126637707, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.21555378801920327, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.47269072275515744, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.21063357946200129, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4560703538905584, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.2281399713503153, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5079006160677625, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.5300714512917181, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.7461630750708693, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3353598298584452, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.17265324947760644, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5397778205094209, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.21518838690610018, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5606804480411077, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.25026408934028455, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.4719458927872361, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.6534040262605951, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.1987777011513927, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.36857838224116973, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6856616009150279, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.3384653583738009, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6082869404281873, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.2476165058078653, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.47909493372494205, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.3384653583738009, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6082869404281873, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.3299895472527792, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6484644951902464, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.42612283570374254, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6716237521842675, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.44542488150142195, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6856658569072438, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.445107576642247, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6955301378913092, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.5420890779002704, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.7268331815757023, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4536218833151678, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.19835441454182887, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6062730082124886, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7107240028283889, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.4727805712999679, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7717158158167359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.44476089284108944, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6551098696198423, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.47207580389427084, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7434109160179552, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.44476089284108944, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6551098696198423, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.4464617303464354, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7384411540866627, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.44476089284108944, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6516368935552685, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.44542488150142195, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6856658569072438, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.47207580389427084, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7434109160179552, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.4464617303464354, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7384411540866627, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.4232354733407505, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7107240028283889, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.7334918117278213, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6544146882590995, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6544146882590995, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.41682189465797687, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.647688351711303, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4441961115027302, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7565542718609186, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.2869066874289222, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6076623179917158, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6544146882590995, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6544146882590995, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4186091892833126, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6584767887623714, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6544146882590995, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.6544146882590995, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4183007445500922, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.656180873465862, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.1817144072367102, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.5200176131748395, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.6838626312597372, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8481552379853444, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.6289868866690355, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8095082593395664, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.6289868866690355, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8095082593395664, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.6378379852740232, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.803154665668484, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.8805305626734038, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.4462689092414285, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.20323131695812172, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.5370679638669973, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.46829007045350673, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.1818483989940587, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.4705600829216706, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.43690976318544794, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.31929652405610903, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.47876989915933515, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3917533437213125, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3642922752206821, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.1712473044894657, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.426932196025089, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.23546056552871467, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.398575696616437, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.42359095518407164, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.23932595221309674, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4673115526141697, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.28613818387978673, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.5528031676146457, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.43481494774721463, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.22271767371139256, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.6553609623522636, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.8080381263652573, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.6313922341364886, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.8051546664747079, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.505242776482945, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.7265524593382774, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2027445624852463, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2514227030863834, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.5227131146872793, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.2046592065585361, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.5139378364418256, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.39020358281213624, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.40763281626399495, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.36781707614204445, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.36583359636400986, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.5261112461035825, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.4931587502890964, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.3421359311004187, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.5068657796437095, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.26700459848070734, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.45614973690855576, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.2661828424443392, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.165838472529457, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.38908651109487247, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.3337972903996398, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.26307356948037885, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.2079701729789175, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.2377025655787593, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.42203897403177737, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.28758906080679814, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.29881258170574665, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.3059460816807008, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.17717634270740748, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.25498213295426564, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.17411036809769512, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5409636216635109, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.4374960951307028, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.6840706293465405, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.16679551613797314, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4850274766865928, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.1943759862788499, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5338394442325974, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.1392908359945467, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.48531978068695414, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.11856660123276004, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.40773148598102293, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.6798017979514573, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.14411291670643006, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5000399749325595, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.16559113761114783, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5464808970807227, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.17411036809769512, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5409636216635109, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.37643606776410926, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.16559113761114783, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5030743954553002, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.3025029865727436, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5564009706295315, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.4651954337860559, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3764940106481337, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.2797412354706287, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5217401427389217, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.4763990880413316, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.28302740134070886, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.3014335251508215, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5726800490411352, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3631697646395501, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.47708987783257517, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.2725758492393828, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3923311316125708, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.4021117013686505, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.4152914707667959, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.36466819017308727, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.37043553303426646, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.41850774247348516, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.40740178389631576, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.3707717111254176, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.47519149773042846, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.3658006211440879, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.37421553597876317, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.22564262486685283, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.36926438076616647, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.15942178318590763, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.36039626112317097, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.5942499629418814, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.3078802898940204, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.3865320677199308, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.24505106440667512, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.1924244680058936, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.7773819133344605, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.3710784497353679, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.24192619393259787, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.4921318417839362, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.12478083711714635, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2863079147361709, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.3292010361291119, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6484221669130951, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.4246163317880344, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6675494539138593, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.278093559995945, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5759531667584591, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.7009885119411133, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.31573558123189943, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5918125947853188, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6188233920257146, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.7009885119411133, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.4466679873664062, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6702025696488597, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6755119791745777, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.47207580389427084, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6992480502085702, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.3191766011456815, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.13566979610140004, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.46773464768769135, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.2869208283752505, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.3243192696860874, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.2229529832462866, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.28619462359984627, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.4274420047831983, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.3601065525200447, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.4045961455348396, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.35271270311585035, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.33071231815127045, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.3225479310829689, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.2509956074597684, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.35813948389425215, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3390387389794623, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6170420596680538, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6279894552667558, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.551397074868541, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5403400891349619, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.37392149096896676, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6665214662145853, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5460240376042262, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.24343304284910333, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6275577931282961, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6431872581462166, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.4216890913810254, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6885217194158456, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6589376390020449, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6589376390020449, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.29176300840900793, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6143650111703199, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.43021236941942204, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7142896582178452, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.24090844358935917, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5468852870478801, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6744253146961531, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.39545121937832856, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6963801389253689, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4101479464529936, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7041976254287654, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.2919394073770869, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5957961314949175, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.17537670874647399, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4800889669735933, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.22845493240080628, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.584996891148118, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.2357664506880305, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6409280879253807, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.23272696712467975, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5794868721814046, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.38785611216800814, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6673259967761724, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.16331948281960493, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.350650198151987, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.8056920633274978, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8391519966182309, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.8020845125558708, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7971172820981081, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4207937380724192, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6985308026285912, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5700185304500285, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7945212279546889, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.43011383006801057, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7140577175386648, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3843363395779093, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7719180936906627, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6590438071804039, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7745649676018984, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.34636800712900173, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5167955767158704, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5397693417183738, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.425143650778693, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6674242019044293, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.421151249507493, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6938674571170766, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.41843795218458035, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6316283876832989, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.4803501444747088, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.7417101158248365, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6656008733100179, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.5124776602965491, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7722874800637285, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7994721822064033, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.38754077501151757, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.598503332887995, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3291598889023262, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6085546680624175, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3470839302425112, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.4845766087853281, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7138566289355139, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5512324461754572, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7289444696770301, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3737098172408067, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6832201170000932, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.21338748895376336, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6034116935803774, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.22436571657855092, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.61166969974579, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.22423870508323301, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6366515193698862, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8215788698315908, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8215788698315908, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.251696695878184, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6180491939580447, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5967384019266717, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8544348080833218, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.29170205300854224, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6498499527552988, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2719326877457978, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6002086362682414, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.29170205300854224, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6498499527552988, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.28592291256793106, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6102727682426059, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2774290545068997, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6397454944654261, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2748202507307579, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5810363959809548, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.693456244639743, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7121135616759211, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7395804946242599, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3684981984538114, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5606332518476288, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3694816688798906, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.1423071532720465, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5234276250101042, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.8020845125558708, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7395804946242599, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.38036178325786096, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20826058354833846, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5799650985757929, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.294467310498826, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4554141323944355, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7121135616759211, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3201911827891037, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7182383858693244, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4536404448264584, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5134477225657772, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.22831386795944372, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6930977635889574, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.25755472674357427, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7079787462750899, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.30041915229862387, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6689250750617529, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.25755472674357427, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7217376192850543, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3142765374520343, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7004749900624669, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4545091839935173, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.7166050399790445, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2919394073770869, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6265777781732258, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4345673759957651, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.703388118507387, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.43485418354574973, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.44797220217437844, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.24939081998882368, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.4812700337596407, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.7668482135865776, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3370129264673147, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.7096874943799061, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31771674795486515, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6550628376568252, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.12648351910430983, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.19910401453355991, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5815343547138478, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4952968469712617, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6384504056254413, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4201902477742268, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4741401979744739, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.191072229574376, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5901487703215178, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3800213082631731, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5676463425230758, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.44401360557563874, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.27587476896182844, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5801799655962208, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.2543881726648529, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3099293756712212, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3766019021279213, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7318674193893624, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.37489047453628294, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7155230965848066, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7807505267551733, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2697856975860103, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5736298373015629, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3766019021279213, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7318674193893624, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.20390514683548702, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6747066998707847, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.8131513745396886, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4909136024426773, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.8295116386418164, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5920266866634685, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3766019021279213, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7318674193893624, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5084550790849273, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.1832567180568652, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5727346150299959, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5595205105615875, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.8322210048001876, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.28592291256793106, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6102727682426059, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.3171094709345114, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7045234516083255, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6834837188844622, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6697898834930974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6934309279690296, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2453392175275486, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6569130291153491, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.20313747122261766, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5506087730896332, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.7815961723922495, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3763693611344683, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6360504215730572, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5760406199498378, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5760406199498378, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.14728212724124629, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.485741585706456, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1973212456326944, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4151043049244464, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3733543476417276, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.538395940979961, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.20763578034718042, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.46035934390642647, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3733543476417276, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.538395940979961, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3733543476417276, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.538395940979961, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33891487511850005, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5365882254723207, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3733543476417276, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.538395940979961, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23972125922151485, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5848344753614038, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.6734648419604768, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.7694606959147566, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8578928092681435, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9422733087334002, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8578928092681435, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9422733087334002, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.4929664394953523, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.6587225864765196, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5955978088638718, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8632174102523461, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.23487811400114963, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.35937816565888026, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.797323390576564, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.30041915229862387, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5110381669871915, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.6750223515189266, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.42818224355402373, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.667901678840575, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.2453392175275486, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5365332655663203, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.23972125922151485, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.48707827505552054, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.30041915229862387, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5110381669871915, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.30041915229862387, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5110381669871915, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5382940226742914, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.30041915229862387, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5110381669871915, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3677323079275383, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.22738612304909625, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.47454858661827737, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.4803501444747088, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7417101158248365, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.38687573986922297, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6514359547109982, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.42105372680687736, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7001171094008295, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.19910401453355991, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5338904589112099, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.24233572351352062, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5675102323575353, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4848137281002213, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.38785611216800814, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6213964982068823, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.38785611216800814, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6213964982068823, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.33425592140853283, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.8110307349404526, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.2767906930665974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.6946453530067933, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3865584077322271, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.7076640192892537, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.19547215688069816, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5978847581113598, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.5183282721440023, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.824367835388174, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7121135616759211, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7395804946242599, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.527528099078667, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4094709585736592, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.47384807927636907, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.5479565964904024, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.6532234058412462, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.6532234058412462, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3567823943323416, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.21576146358278564, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.5945881910966203, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.23972125922151485, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.6266330371317139, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7076271819674439, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7128603669502883, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.3584668928097086, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.634863098567942, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.3584668928097086, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.634863098567942, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7128603669502883, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.6896985035484708, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7076271819674439, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.1939280560840041, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.46312151331492984, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.7076271819674439, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.19415472735264994, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.41508997974031253, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.23972125922151485, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.6266330371317139, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.20390514683548702, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6152907875442002, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.42062888241722096, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6813469636986809, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.2600960555023324, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.19898107345153532, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5339708887761974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.4770332228554784, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3490251488234659, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.39545121937832856, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.6822216627082669, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.20313747122261766, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5634928669626099, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.17389434573554247, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.24233572351352062, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5561045459288251, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6294033705157869, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6452705345581219, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.1712473044894657, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.5272789142558241, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.2111187176080899, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.5086920944994741, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.15929050399664219, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.5414849269145706, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.4909101855057947, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.39545121937832856, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.6541357656856408, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.43281826407421803, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.4282924873829561, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.4056299814865685, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.20028107620075963, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.334851704167788, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.6474532635641537, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.28615556452106294, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.6299182889624744, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.31118041688292913, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.33464494273746426, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.5983196805551743, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.18961526642588783, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.334851704167788, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.6474532635641537, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.1974694070034893, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.4787974949414673, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.713332477096005, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.32952451615212436, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.4924875778629721, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.4021713045548922, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.1842384650009126, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.20313747122261766, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.4583712036944982, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.12962472880491877, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3044799424809889, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.39936742298908956, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.1641956652179752, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.2615311775021803, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5508394512324739, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.23972125922151485, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.5758647546570652, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.6576054208318073, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.4536404448264584, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.24001896226067918, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.5944702899865559, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.17979969665124504, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4918511602341556, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4837853350093983, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.41368954504257266, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.6422762292356853, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.3836841681691306, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.7127478995829692, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.20479056612936936, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.5251515188723699, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.24237768532177115, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.39469536234609737, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.5917048915180981, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.39469536234609737, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.5917048915180981, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.15604242268653643, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4751382685885456, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.230440974470398, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.4262626090774457, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.7058556376289643, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.31446071400663894, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.5931614744771728, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.1278241696265761, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.19319794288373768, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.45771966440001516, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.15824382329465247, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.4020801848996587, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.1835554260049945, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.4427324890847145, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.2491316630275714, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.2148547638367739, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.26506234837226944, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.19998549292703938, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.22686182598679874, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.5340286051317624, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.28799583290763703, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.24220427601736638, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.27250051496110134, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.21813681724512826, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.24062089463790082, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.20312835120509382, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4762668365393059, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.3407563025626974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6104226554223803, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.6407363191582277, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6966460917682386, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.6407363191582277, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6966460917682386, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3937848105507625, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.36684984164094486, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6366318617371836, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.16692486522015718, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4023647697112747, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.57359744419911, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.25270455578796175, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.571873510015388, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.33383285644152466, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.20312835120509382, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4762668365393059, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.2719326877457978, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.5963825614997932, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.5595205105615875, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.8322210048001876, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.5595205105615875, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.8322210048001876, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7211812032548905, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4812700337596407, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7668482135865776, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.7211812032548905, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.39898493411026575, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.6660257584377366, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.2719326877457978, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.5963825614997932, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.2731306427308864, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.2719326877457978, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.5963825614997932, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4770529960418919, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.49975293173596386, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.15083364266523736, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4907822977105627, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.15821285888349262, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.49028210447768544, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.14728212724124629, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.48638762628235294, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.45056319355400093, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.28597887157586055, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.6270330226583704, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.2415725261015974, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.5949018835911474, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.15821285888349262, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.49028210447768544, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4408750259635687, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4180191500256661, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.2668173065178967, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.5335299694016906, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.7980971476599384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.35818640176176625, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.723627810424739, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.3955812506211637, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.4745180734945151, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.43483587481573205, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.6723935384652386, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.27080524311589804, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5735629822442805, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.27080524311589804, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5735629822442805, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2765896733581188, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5826805982089127, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.21576146358278564, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5673560872668851, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.39545121937832856, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.636466558635705, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.21576146358278564, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5673560872668851, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.1842490992269057, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5113499757807896, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.2165768464503216, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5757840553675324, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.2912014808653287, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.6424610716762174, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.2165768464503216, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5757840553675324, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.39469536234609737, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.6657467951920233, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.2046592065585361, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5887122703216473, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.21576146358278564, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5673560872668851, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.34993707212869785, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.2165768464503216, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5757840553675324, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.2037792411904348, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.5548802330642336, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.7005713730032203, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.11385032360134208, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.4382795902467684, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.1365189729052536, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.4259340541380412, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2815871636550668, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.3806583469567467, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2691593314181093, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2742389123790289, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.44777955633393424, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.36659971468949054, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.3305228230404804, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.3993365662301727, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2941979168579534, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.16299446731288944, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.43649143020176306, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.2250861242438523, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5618434465935181, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.3238579233802238, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.6198368821194998, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.512336215207795, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.49345671324082974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.20312835120509382, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5573572048061965, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.314655516390602, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.3365822615578528, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.6591579540156445, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.20300292520931204, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5545072586157459, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.3365822615578528, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.6591579540156445, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.32570267192540586, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.19469940719627615, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.5244232343746598, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.8131857452490882, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.5124776602965491, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.6265447017943011, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.6730489965212471, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.7670434817254471, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.6730489965212471, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.7670434817254471, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.28592291256793106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.5686492116636237, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.2912014808653287, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.7275929939966964, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.8131857452490882, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.8131857452490882, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.8131857452490882, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.19511368322427836, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.5589893625764298, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.22714355926020957, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.5327568967360922, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.6807294776537712, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.39469536234609737, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.6937261271262425, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.60585154759089, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7547619819808454, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.4262626090774457, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7343467434735558, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.42902664419909115, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7443887915363598, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.4020760403449254, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7016962551122522, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.5342348049013494, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5424938760789326, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.8020816078177312, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.749948047540145, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.33042158593448145, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.4375968762068432, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.6590522929608883, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.4812700337596407, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.6942705518980387, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.42195777059677314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.6687475942312653, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.5989728676603553, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.48156738796358634, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.707316874318671, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.23972125922151485, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.5755240213917002, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.4812700337596407, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.7668482135865776, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2396991920464788, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.48156738796358634, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.7157788903059378, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.25522917707099674, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.6815801937310393, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7335705336375569, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.29715678881302643, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.661467129406907, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.29715678881302643, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.6509319807414574, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.29715678881302643, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.661467129406907, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.30752616970214336, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.6106236483676958, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.4464617303464354, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.7099628979634083, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.3843832649911012, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6360002062017179, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5575264207911254, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.38615059096335336, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6261010061605436, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.33965884450200445, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.582814803428267, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.22714355926020957, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6149327491870693, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.5890498835235906, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.8321239689935634, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6647046501418657, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.5577957421679061, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.6972259762695181, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.1917460913619136, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.42437073033751493, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.31912296554499103, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5467829654685376, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7096175474139502, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.2033897418920923, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.551556930942916, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.42062888241722096, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.6825498124526633, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.611788827244731, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.20313747122261766, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5346701852529732, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.21576146358278564, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5587530087616077, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.6896985035484708, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.36800882629132287, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.6460868517969176, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.7096175474139502, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.23707730131910096, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.2706380285588004, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5602052818707742, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.31771674795486515, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.6823632455739186, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.5397323593778651, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.8110662878512482, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.6656008733100179, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.2111187176080899, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.5647907462051993, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.4546795690250899, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.6656008733100179, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.5397323593778651, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.8110662878512482, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.5397323593778651, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.8110662878512482, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.3884085226314684, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.48156738796358634, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.7671994551643374, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.38918346804460413, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.21644311639014951, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.4550086560720594, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.33188010562448456, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.307502324647974, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3068845541987739, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.29264275195494416, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.24001896226067918, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.6014499104482237, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.2384726227721658, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.4820178233978107, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.44981557841577613, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.23509223658357026, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.14962848372546667, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.31921457459318575, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.2919394073770869, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.6265777781732258, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.40982224146042756, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.27080524311589804, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.45763886314510427, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.3521214014864166, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.29308590601052215, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.42902664419909115, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.7089330062523613, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.3086883400264, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.5960059844153068, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.23794506474388488, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.42902664419909115, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.7089330062523613, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.2567626980454705, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.2998634479378894, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.6247826968095733, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.48156738796358634, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.7671994551643374, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.7590994812356263, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.7590994812356263, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.3763693611344683, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.681475411202769, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.3884153333348233, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.43011383006801057, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.7441960090869769, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3113612721440885, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6244179228679348, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.19857943409196785, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4841638348150365, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.502026173233975, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.2573956940045279, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.5660567243461767, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.20229280648000492, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.5193630415443222, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3113612721440885, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6244179228679348, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3113612721440885, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6244179228679348, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3113612721440885, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6244179228679348, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3113612721440885, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.6244179228679348, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.208795826063924, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.47509890161874874, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.21992062963866632, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.5471220923935656, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.1427404270947385, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.3766019021279213, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.6426846682861654, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3767656346408826, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.37693555882757257, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.35816242771443213, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.22060794501194753, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.334851704167788, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.6009673735564677, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.27080524311589804, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.5251724178189929, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3975448812222411, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.2174536498549041, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.4945481209434918, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.33807764768133375, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.71426422535372, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.44701617851855957, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.8047183456557263, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.421151249507493, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.7602108728496834, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.24062718841066488, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6778014913685915, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.20617350508583818, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6739851297272836, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.24467340606531432, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6560671328641873, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.2917591430729611, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6844204996787111, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.3793086863337399, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.30041915229862387, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.6550573187445743, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.7392285437932827, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.3062859135460401, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.6540898825644205, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.7392285437932827, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.7301794230871377, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.484611284323379, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.48936469277309125, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.3763693611344683, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.47084569901172335, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3429043870200186, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.20207938879963666, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.5275433362317532, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.2965071539728828, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.5085503390295181, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.7251727471866002, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.4356925719771587, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.16246736614250729, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.38605159790728016, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.3026944877822123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.38390149148943287, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.4362508313532012, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.2600488816870883, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.17829987290849303, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.4191501080003414, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.3934356665260354, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.031442147565579066, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.25772455902514985, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.1969221590285716, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.5206656710605527, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.4079052344385883, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.3439550611757983, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.30752616970214336, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.662093020699087, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.29170205300854224, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5600853382301801, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5332286348751792, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3308959815150696, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.31327681146619374, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.6404873704225963, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.36684984164094486, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.6276674727087102, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.27105363860597637, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.43406631668987594, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.511075227027215, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5741842828404965, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5335784441425054, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5335784441425054, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.24062718841066488, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.6139570750776484, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.511075227027215, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5741842828404965, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.511075227027215, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.47980800108851346, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.511075227027215, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.452106591437223, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5741842828404965, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6529241277890402, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7410529316463808, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7638521785649908, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6529241277890402, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6503678865722725, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.38513414673376833, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7120077407246694, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6934914549971836, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6529241277890402, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6934914549971836, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.31771674795486515, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6036038206046929, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6529241277890402, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.73719964992947, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4797543511401896, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.7030838074817461, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4797543511401896, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.7053481527490161, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.73719964992947, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.73719964992947, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5967384019266717, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8544348080833218, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.73719964992947, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.46298522813477694, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.6897393951285803, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.5967384019266717, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.8544348080833218, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.5870764478159658, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.4328015276270854, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.7015297445241917, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7199991365237522, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.3084977337313932, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.615980419333811, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.435949382480739, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7673284019128814, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.702426996306173, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.17855149299161596, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.5203115480779714, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.17855149299161596, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.5366596515222662, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1832567180568652, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.46874652173038095, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.17855149299161596, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.5203115480779714, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1832567180568652, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.46874652173038095, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.17855149299161596, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.5203115480779714, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1832567180568652, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.46874652173038095, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1969221590285716, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.5276344273763174, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.18728674627858763, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.49857067709692, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1832567180568652, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.46874652173038095, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.14962848372546667, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.425496866339571, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1832567180568652, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.46874652173038095, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.5019033159973346, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3882810705699302, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3200586334957503, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2926675483598696, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2940727992972817, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.26051063874884706, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.3584077083565857, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.7394348668357312, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.28531713096063266, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2904734822892112, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3469947595749004, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2461344639192595, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2655620124722497, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.17020807300741128, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.3763693611344683, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.6360504215730572, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4909326710993637, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.32338428706911604, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.5840503541053488, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.2742556870386487, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.5231431994520171, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.24969367482838334, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.334851704167788, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.7234023926557539, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.5840809989792347, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4888708932434488, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2579180303636169, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.1414885045412184, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.20760470031302655, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.42791815571433417, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.413948387915005, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.6536628131390233, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.5391491945473402, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.703591887429203, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.3166072542829537, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.14962848372546667, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.320407667005801, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.2564868977542172, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.5954598909380219, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.8010685131009633, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.3479731564184223, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.6172522642259175, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.2719943818446656, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.32282138800401855, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.5801749060979678, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.27486480972020183, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.4670509248796425, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.17202650214787163, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.3503510714510492, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.39058393006987374, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.16261055653267345, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.3574935801968696, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.31315988574922216, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.2220992502530224, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.4000805406381997, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.37568878636171427, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.42123893181020194, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.260080077047301, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.32481036250266265, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.346072731154532, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.2932383433617197, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.1317331393528801, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.34289957530696186, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.16405809898807555, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.21644311639014951, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5575527454538532, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.24248545140243574, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5768340234336301, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.3793309425596856, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.2037792411904348, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.49504238714090304, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.21644311639014951, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5484899089483192, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.2046592065585361, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.544128595708324, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.2165768464503216, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5556668977066362, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.20390514683548702, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4993104339049491, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.2589731280621761, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.18413533063377066, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4955509874287835, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.39569555015790975, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.6841500930430788, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.45653838513939016, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5463837424085701, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.413948387915005, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5436043789950441, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.47323525740834854, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.45408876670111487, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3503570926151391, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.28592291256793106, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5235047096821839, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.6789177867237879, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.27413159457082675, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.28592291256793106, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5210087973470136, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.2076047003130265, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.5791447789263454, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.2821801681960571, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.5343486909870273, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.2823545141004295, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.4992160408903782, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.42781484820807203, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.3240069994352789, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.8084123599808738, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.5351783489396891, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.30238584075547453, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.5923011903684523, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.32280531478977453, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.2719326877457978, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.5105359942982793, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.19408883848117267, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.6457837185727413, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2764205123105664, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2905442260587855, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.3044316105248322, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.1693262946654562, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.3113002029497926, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.5780604477077254, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.29532213400892765, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.44922962827364366, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.14173543163061522, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.24993921017596432, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.48106412052016373, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5351693240792145, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4853530227174386, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5109316705796892, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5370788574666518, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.45798136636926595, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.18814785746917081, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5307880463310148, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5199261214674054, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.2583320817896072, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.36710489645199834, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5336198282523079, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5049142010220853, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5075366635951049, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5156030482282894, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.423135312287944, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5388253098847887, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17905278399134197, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37257295447029826, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12620429887108936, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.35580703793872603, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12872220631084524, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33602633953270183, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.03037224815656603, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3045613775157565, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5275070803493389, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.10203846572325131, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33381153680096753, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1685643537060726, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36926449644166065, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.014935758919429663, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08106107745254391, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.044304867337633724, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20806974344498103, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1418524086391329, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.38295770773758747, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.030860166165309233, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1100250143829584, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21255327712152144, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.43272151570555034, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.05918530850500025, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.26064517697298795, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5092206110218525, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1438459189500836, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30693371625402605, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0979038733644086, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30211704738953993, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2288990188897003, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.48933901443699584, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.009624974244068071, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.07318255686027669, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.043420474648595074, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2884095690753619, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.19074380068002203, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.40566585096277824, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.17382347640129553, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4061580777885601, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.10361854845420869, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.32774802711076473, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.15186969315425305, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3458120002305796, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.20031726728306523, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.24015860380810322, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.432284860519166, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.20485833586704885, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.468735805943922, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.192481383169461, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3799051443349615, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.01252735726099625, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.273148644463442, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.22381487678101888, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5249370100068887, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.09431297723472011, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3616856339096348, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.189717083187238, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.10266747466754884, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3364703638684802, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.28912109037408523, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.2586476022577052, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5379610020033071, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.2144604484498437, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.48894052224175993, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.21001173689943997, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.423493931076046, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.006232910970143225, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.06317168666869727, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.06938388878349923, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3541078046399395, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.10666682719585797, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.33462901494141756, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.14557808399334188, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.36598346755702993, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.06897533888461813, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2776666563000344, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.1665765483402476, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4017968725013381, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.04151505758906764, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.19356733603515675, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.39927676303776216, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.12189363728567917, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.37595660827287636, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.18926971577178767, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4931453714148122, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.004663531624960091, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.07262533604330305, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.037401300306846526, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.27395881217705964, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.20198948917565754, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.34858221035657466, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.16780109158842918, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3968694014697679, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1579497466001673, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5092928545844059, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.16286876096900815, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3422914837190449, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.06888992790640074, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.2874483621307283, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.2552422097218187, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.42162943894149857, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1631196072688366, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3502730667074754, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.21286836557101563, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.45055232014427626, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.031126201157905466, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.015970144454664378, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.06929847827527827, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.30185194035792856, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.22669486951066523, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4484451941575473, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.11697642623186386, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.37117753637984835, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.17621963873521423, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.09916146090364127, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.3121110160693956, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.011560595536104562, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.25846370764999194, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4472520901382737, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.07368089078790738, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.41452613113710224, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.17892846390928677, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.47088195615067674, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.012370537823050053, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.06660321132654005, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.09831093939330879, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.33203866499974327, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.09916009482330297, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3032928217006101, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.13805615693046389, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.40787998733941394, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.17643078314788999, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.40757584786696294, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.1989414239237112, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3791567776918788, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.09453698369211004, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.17848407049966333, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.37731466186079826, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.1995980198896431, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4244503391142409, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.18216362398065106, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.36524832602306334, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.08319287955437346, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.08383676689911676, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.2855329690010324, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.09478705591775652, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.33293232395887284, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.19148282873929853, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.4707949702068854, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.08183353655679478, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.25007633393249695, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0981642545874085, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.31793222329793575, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.11976209355757551, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.27004759126600675, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.18169212046427471, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.40815460354304234, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.09142555538569784, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.31371707771405133, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.05438497632520132, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.26123506271154656, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1490232164900303, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.43745835724045856, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.005606294971348417, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.06662245090541388, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.12752236829255797, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.14057105892389254, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3028381427383384, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.12157241570357182, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4080990097991491, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.10415298161056984, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.33452632923050557, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.21204239268527586, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3846197304420823, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.004763623056487517, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.07485928007606017, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.25599133172724897, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.47354758587475243, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.11689600237805012, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.38258301195690664, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.19809535837880818, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.447539350421338, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.004718557257042585, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.08019304349523304, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.05614653993259943, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.26485323792360876, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.13635319583999642, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.2850432830231861, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.16431887969160053, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4088971379214799, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11452508920842025, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3212742401272785, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.08218359452575877, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.23905391762860753, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.17673835621668263, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3902085179927465, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.051272222858601425, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.21925629669878902, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.26224701521976646, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.45901439168213753, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.20362195873137665, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4504603915919526, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.20927351091825444, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.41232284529686536, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.006488743008712295, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.056679733231823716, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.04209313835422283, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.26913406771501547, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.06070088845782673, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.2584364364927186, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15184278721506198, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4093399937921707, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15478222669012726, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3550584759508654, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.06244445123318812, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.28239834932587327, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15685632649880807, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.34378295878971765, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.06467646497347093, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.2374647159547877, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.25564222289599997, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4291463251432027, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.05675489168243481, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3098329822024127, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.14459834065375157, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4652483976219767, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.056417721736162135, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.14707146406788849, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.050577564370191244, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.26455598459911367, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1054433514098504, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2840946641780818, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.16758563722627876, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4598125962895632, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.07875433150726119, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2638954513805452, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.12334630141873701, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.3570869171580578, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.19153195331287226, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4035796398628449, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.005649824351905227, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.09384599631616997, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2535787381720089, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.45789666197043016, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1738582449442553, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.40161714405254456, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.16120676251405475, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.3934823211441987, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.006102253115653432, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.08614490649176082, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.05052791122570277, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.25244788085139286, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.12768613576122964, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3279857505284436, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.18041700926694673, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.43852448917973136, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.10734088848154077, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.33946796348247366, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.12499287263993265, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3031531068573407, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.14318317227039934, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.356756117753337, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.05915285533036862, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.24879847318007425, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4808459302114646, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.15720527174368754, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4715103005986015, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.1853793533058344, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.47839321418703307, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.21349841283886073, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.27204846616025496, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.16136987880724096, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.33626920748765377, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.15197436941722972, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.37271000364127155, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.17795920517030017, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.41862955401967455, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.17060644184287996, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.4054584763100862, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.17670199390439656, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.36682227371085463, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.05468777721214362, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.2495519218392036, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2706589648942988, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.45763107813926884, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2195372587354865, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3664303672465512, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.17524367912943578, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3908643084796051, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.018807992767181335, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.0887797545718027, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.06437840881729344, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.26576141148273813, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.13230039635238258, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3269392904147474, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20266988583156875, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.48179719155586864, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.19388048412249795, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.44361702376789247, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.051668546856871944, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.30087455074312014, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.07523788658275522, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.2771251147123664, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.11588911231177465, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.23827009172444413, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.21341277372071984, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.399326719551766, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.14947301772770566, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.30933819017694797, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.30287590293584354, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5604725285592409, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.004712897582765101, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.06896759630799948, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.05079616735013072, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.31593754046223704, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.17544176680792672, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3246583081139427, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.17337747588904887, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3602154895924569, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1237012344369667, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.33331866832253354, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.13636398180563136, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.28987182748753165, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.11209730709870733, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.31364540519664647, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.08369831431112969, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.2040584431589136, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3869788846445458, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1955903221252232, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.40982133788576824, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15417968758527056, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.37529479808790556, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.04130329986722028, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.041010356073949844, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.2740873282030685, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.09199306870423013, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.22163854171424513, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.20923298022634812, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4391664941823773, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.15589802574348086, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.37894206802233305, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.15697021945336284, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3714913924449907, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.10748431441036872, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3292908634814674, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.005478181714811864, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.10269142826924012, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.22493822179966638, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4356073090431114, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.12201642745653093, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.36024578318571476, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1107058214411635, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3732245061642063, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.039842290129399376, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.0904612219823137, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1049798504546962, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.35536862718638546, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.06089987261870556, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.2933161562815446, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.13303798096767047, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3647236067340644, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1948502778967486, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.35525815981538433, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.12370396553485627, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.36583052207842287, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2119411486498165, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.43506560274344996, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.08430389952515091, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.24884644138656709, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4890540554955454, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2024418414576267, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.356433349373201, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.26748241941426637, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4950459974606264, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.010814656004254549, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.0896887156447935, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.2679540690980116, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.14391826157279944, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3239832814361818, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1769688060281599, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.43047038034793145, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1618333627385132, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3458746996740858, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.17740552204949464, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.41741195239753426, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1442495030513253, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3320792016461631, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.10022078146902932, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.25434273119344186, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.20519952157280522, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.39166755112117607, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.15991026977564765, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3236465324693797, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.08233847062654806, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1668854639288255, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.42010321376888254, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.05135254464623785, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.10818248451466282, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3258308765385693, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.10027955093430833, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3650503321876689, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.171833798351082, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4265037420578645, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17393111207515277, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.39042812195808824, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.14437570687117765, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3403124594589958, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.20204254060396054, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.40631134079482684, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.020318277383243454, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2507351446416792, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4471643682678732, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.12853443501376743, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3781656409614192, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2009118875039034, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.48691094449139866, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.00517631148112868, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.06740002819965461, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.2452462470568165, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.15998665872195003, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.35681333217176553, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.22922072303609867, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5075702211165173, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.19064689695123957, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.36954921822756504, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.19591250914526429, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4012747453552514, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.14508364614975736, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.31907461937638537, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.07534587005281039, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.21764751288661083, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4696664542993714, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.24024632160930773, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4371716158103883, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.22508089265035264, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4850615114261917, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.11472913879621423, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0484266906744459, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.24913886287924247, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3141794892548087, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.11397867508647329, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4390501380282409, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.1785851272602057, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3800733399524004, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.10927209083864609, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3154924313217727, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.14399622191067446, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.40927634117587713, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.04825170735480719, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.2469647404663632, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.23918138501594022, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4342449335881247, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.13077065491742576, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3664151599144951, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0984296905675516, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3288790320536164, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.004333829482338306, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.08668716511436675, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.082669215590649, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.2792157437153376, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.20219794591777904, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4267163836239083, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.18851320324917495, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4118109845203767, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.20113943179758872, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5054929215592371, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.12371001489967776, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.35233081465372856, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.14334269972024394, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.31813833621829557, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.03336328330137746, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.2784134123049492, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.48776854891153376, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.14044205071392263, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.38712210510627154, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.27182145159884086, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.526070184366635, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.019129282930908934, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.07711027977872816, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0957971819913436, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.29031027439121476, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.16558784557611658, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4099467657500184, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.06638301361073934, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.33617826818768626, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.07088281524771703, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.1725752257112697, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.11414041086884202, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.35006273110713093, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.1313979824300913, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.349118229304567, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.22432434110392951, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3260751755857241, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4848127748471755, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.14067214182271884, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3683127018431368, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.261537968195518, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4670896511609081, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.009837503470338078, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.07381634573546925, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.040393483260585776, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.266056205197059, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.09337623404557584, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.255517984683644, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.23272041020266335, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5347837552430531, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11901413329120636, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2908877283991857, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.07103656838719773, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.27534420196130394, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.13329630208389306, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.326901495019388, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.08474505774044223, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.253832017325449, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.27776911552756844, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5207350087859894, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.17957474071770196, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3714895660565911, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.17878846497211381, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4805877835621217, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.021888464288139578, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.06880194424178804, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.07403099975997424, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.25305896973942904, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.11889226114628741, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.27239589447707985, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.15964995175974525, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3581305879558541, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.15593857496482408, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3832822126692406, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.03964626561854378, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.16338514690556735, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.056046675552729554, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.18841684806509754, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0103535778609182, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.1374348609613479, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.24530071385520955, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.39829793941181424, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.14834005339736556, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3650996903674865, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.19730454275995257, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4060233750197503, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.06643196929197938, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.08968389355416555, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26065548087048496, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1101341452220285, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.27185902677547247, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.14086598242600956, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.36137008859982034, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.21107720643690867, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.43911506176829573, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.03937709136327999, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.15616761711166294, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.12066885519467463, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2616326726997023, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.05438497632520132, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.22483031146316076, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.19772510321758924, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.363359576781242, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20352428331068037, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4163814361305384, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.152083233596389, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.307389858154115, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0019267822736030833, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2729231212296316, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.15873502699291203, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.37163016195847015, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.16084008820568224, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.49453446122836875, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.13827947882974537, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.4010585707766239, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.25209019490063744, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.21271493861618962, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.48032757836964046, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.13242778128862073, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.26030283597527587, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.49112782340858424, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.22218345206036327, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.531967559614025, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.24660313247404905, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.4750802403221604, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.008808482479470064, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.08172375369450574, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.027855314822211794, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.2743064672346355, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.1046814649445003, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.35929100435813716, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.1605078796467662, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3607284417441162, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.054452721416611755, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.17786113214625052, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.07394430141087438, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.308455636822965, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.1462174255670787, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.33477501662684966, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.06361340947543563, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.23606133878828675, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.28045674084265454, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4517321696983274, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.055923960513901805, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.19457014898752104, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.23365039523187425, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.42556791226379487, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.006313131313131313, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3270101704079343, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.12832094336767122, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.3205905925059277, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.25513503948815797, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.5313546358608554, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.13788203160207568, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.3107655646435926, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.1709984622318412, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.15045845033645844, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.35967967969652714, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.05137179048809052, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.2179007767607974, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.3089004315341498, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.48772563434127697, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.17526461622814685, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.3887795637263755, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.20843200459956857, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.42187376156085354, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.060410667564482795, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.04978921592425999, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.2697504713353615, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.11215442765734894, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.31037546676327293, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.1634286453435278, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3969874268891194, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.19418939219609221, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3901489832573322, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.048304077864023846, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.23970159686687842, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.20653927241974365, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3737641013916679, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.08226969152601427, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.15696885690938372, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.23244643124640743, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.4157655954514044, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.1872830229239533, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3677267744209934, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.13963559400140405, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.358561623108295, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.07846319110079712, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.11613581491070282, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.2960371403891785, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.10180741374280794, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.35728563956947634, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.1945423193070673, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.4706222268986097, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.2080824447945289, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.4427653693519822, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.096873931674983, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.357921862131455, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.11119610005454576, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.30792475044662354, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.07130512646301328, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.20474632477427873, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.46426130647037495, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.10226015509299118, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.40597878858272624, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.17770686403077657, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.48140657517707824, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.06004443343680894, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.2456391015238017, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.04398085473438986, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.2975413849030591, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.06928783103636403, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.28222804846457444, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.1334077033965181, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.3538599860218621, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.12768405545127823, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.27604666315815635, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0780295493314355, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.24395345082956324, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.08696621894928246, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.1961381404046578, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.006322585449419441, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.06296222300910888, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.19041407809791294, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.40450917856802315, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.07176020589506472, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.298641857962305, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.12417300961365357, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.34447336960471725, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.01552617838217617, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.09140076236425068, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.09225450507547597, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.25320206287790814, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.15282474172999858, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3332497402594901, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.15757286670204007, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3806769190392542, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.05441383188454176, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.24018232621879906, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.1426986810099998, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.31786120425004616, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.20406556965912795, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.37608957302795537, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.05462016386637322, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.23421911777634782, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.40751249438768894, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.18085489460790177, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3621070250317998, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.17617182067763734, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3491313268402431, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.07124457091987033, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.19364253421963298, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.35262250988466515, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.10490012364788723, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.33660463773341737, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.11899511888513169, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.4352142502284442, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.12579975046393374, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.404341788822506, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.08821147859816221, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.3359502995810114, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.06336359180003494, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.24215944474690457, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.2516489827410343, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.2340940710746067, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.483043611496241, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.14706390860967783, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.3214752326421373, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.2054747730587689, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.45289260702352463, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.01050247985807066, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.12711751466537588, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.26095912281725886, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.9087119657256413, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.9560455759500431, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.9619541754104973, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.9780227879750215, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.6915198173552017, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.8970348615016391, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.7141488866263325, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.779657632911011, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.5940828417346083, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.8140638933937222, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.2864952055011831, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.47749051793940483, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.7550771065896408, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.9560455759500431, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.5696209108744519, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.7189523456157609, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.7200916222527397, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.8631206418308938, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.130164515743941, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.34629605922390666, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.16831858516266504, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4055923540305375, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.10548435635950038, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3216875314121515, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.05430124534156916, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.19980447534063997, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.057343051243090276, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.2543177324262314, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.053828439630653514, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.17598984210619661, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.25623785429215434, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4677298196544807, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0725167639529624, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.29235386756607995, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.18224663601845273, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.42705463662730664, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.03011727853732026, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.01613385383938365, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.10556218836411393, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.2984252488377484, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.03878009660847357, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.18823804107102407, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.1048945915828233, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.2682365472098696, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.06705104400125081, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.10364481526694266, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.04876338988182848, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.1659640928468496, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.07896475900705206, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.2048108716476644, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.11105189309737737, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.1600171621099208, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.03852977679011041, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.10528935175350577, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.033289100271855616, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.01684375102771261, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.057124649943672015, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.12011237160584264, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.001959247648902821, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.03474890559758415, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.10986421427554392, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.14245697322261636, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.31264665723789214, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.11764506340202335, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.33221255414061107, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.03174814557417323, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.22677038600489324, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.1002259437998706, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3097950385521604, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.1639463523594731, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.35760502908172076, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.07274026865105182, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.17148554966021845, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3250663061067276, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.00987676385026907, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.06183822132049342, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.06896661690628482, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.25550275941289924, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.18966584330202407, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3677579424388819, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.008676004985440106, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.06213735883437277, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.054063951849895646, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.22471751395789294, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.11813127408984, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.33360638121733993, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.12452426344763672, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.33397340303561174, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.19483286033292496, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3818604583347797, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.15375876337917632, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.40619134928299927, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.18104835024815905, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3646278830106281, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.04114837516633565, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.24054900896535664, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.5113816914630019, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.1438005681117966, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.40809859931270137, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.1960515347006078, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3829188025031592, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.03388606284285434, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.03903463655378414, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.10483721246894277, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.29429054338289595, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.18262512815371146, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.3793372141796691, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.20118989409590474, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4568193159296443, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.1463728853883045, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.31232598846995213, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.19571475348790923, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.39920398184186273, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.14494034972679515, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.3591070798808878, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.06655131935655087, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.2586761509449017, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.24155574630199578, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.401555835867115, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.17968751167959493, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.3532466621095985, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.21212476816911802, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.45277145500626653, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.07535055465501972, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.04639980294552743, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.21165282585763617, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.18601317791265554, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.3622769931215066, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.10134296554489586, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.36352625085241486, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.056826643919713225, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2266491488847452, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.15537992498552838, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2883334105485886, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.28335103320234567, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.08456930872418965, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.19628672577369188, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.2015417872146417, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.43922925250217454, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.15957517702086002, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.40743897447570776, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.17592438916838554, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.44324094214107496, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.04460344454473387, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.05185187637560759, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.007193841255268015, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.07281943700425128, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.11586860285558973, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.34426859851631064, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.11537817464773759, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.3235971362772825, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.06766231174629671, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.2969445272858798, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.06136044368743127, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.28064577977026595, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.1270166435836513, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.3358025326946177, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.038084270974099985, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.2506085391461044, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2763478747844821, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.46667544697904584, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.22886082820773523, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.44299807792068047, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.18540761881407403, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.44065653401882693, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.008002390264131916, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.07894790558653815, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.050599660118273604, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.23384318295085793, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.15459585401418227, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.3893244765025937, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.14329939975046438, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4108923582650918, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.074972966125329, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.24626763305506796, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.11125675698572768, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.3396260733677202, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.10292237126920266, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.257640060451522, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0059334149311646755, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.08228836041742828, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.23674819725709428, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4112073942955352, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.11337534719923378, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.2994620506111809, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.16752333089459695, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4368242584300212, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.022171554761884083, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.07208407750039555, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.07917353810429117, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.28227859195431576, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.14039526843208108, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.32365712589934936, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.17760506260243636, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.3876268199476132, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.11133338686962291, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.35573855512815966, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.09619976321707868, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2711236080024818, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.19766937520219646, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.420372038303943, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.05903450822297854, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2340338200887042, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.19395070203502235, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.3842790377686493, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.17279545961951226, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.47435984974317186, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.2355775714536014, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.4751548693556032, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.07535773624684425, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2964854867084402, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.03901477466571775, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2037012862266554, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.06637842065802063, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.21920740076602796, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.1449164009012341, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.3465082189188072, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.10248481481009991, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.28479391150159855, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.07015085442074137, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.16316416553915214, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.17794585206155172, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.10515459794240459, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2595574477657192, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.19278662059992974, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.041456611364145475, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.20732523199036149, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0465445269655205, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.0737413158125885, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0417877035451695, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.25972967536309605, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.013978194191220837, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.14269295464324133, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.385707323503979, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.1637184523716508, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.4161934605653721, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.11145596509983458, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.2726799212917933, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.14188725539444494, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.3320570749305626, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.016098806505679464, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.0896538250999998, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.22297147454167018, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.430433050701162, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.07943825618517553, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.3357496873199587, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.19203703944580386, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.4598954047199655, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.04815454684943356, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.052921142368188685, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.04802396750487762, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.2260828104618917, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.10348510007232478, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.262284085504438, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.2592475459062113, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.4431845520770858, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.10490171797384476, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.4224966678718647, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.15853866673457936, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3919119546822024, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.10228767876406449, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.2563726374134224, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.06599446653043191, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.21965782403107392, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.27813241413261985, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.47379560516065056, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.1639110243114347, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3643799443374109, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.014207387844820368, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.15411273725684005, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.2147416276784343, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3929468618613229, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.05685645333743278, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.03987881150734634, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.23923990018919683, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.05551337802991313, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.17127810877064262, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.4177685028472229, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.21087258811486068, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.4182975936964002, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.12272317458390947, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.35138790489746474, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.18399721582637169, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.40091595611954, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.22790946436828668, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.261400078784685, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.4246430677980096, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.21299315183772102, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.4404655669054682, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.16495971085520575, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.41762478633248973, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.06059807419181428, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.09276333478547918, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.31791450398384064, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.06570335870921905, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2479447435027854, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.11560620039242474, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2786890063705584, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.09867858411809763, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.3225529873990059, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.09848268370198501, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.30734497874640454, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.09835335087704755, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.34005102180757063, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.04721429386335365, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.31075905542125126, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.5202913427171048, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0794619983812611, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2908372726398948, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.027719195525236065, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.13685668579971297, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.20470875192232219, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.42504796662718713, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.009098982345428752, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.08310894234642512, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.04045381290021945, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.1727288037490547, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.12370218124034608, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.3512013342182375, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.22396412213589117, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4759025637784212, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.15627978895992434, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.32774682416221296, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.2329029580518658, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.16942168323911655, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.348866040524313, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.10720803875144091, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.2299648095054087, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2687134687012366, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.5054807217503037, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.18957610315835438, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.44689148142047136, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2179258604235174, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.49494036869518365, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.010596646407804984, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.07677294813171638, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.04840317993719298, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.2527418671450915, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.22220265603625808, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3850042924274655, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.23925860034637106, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.4322103094292488, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.2340940710746067, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.41823616908031946, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.19992538617203864, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3997987536424768, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.10781100627978431, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.30457905210205677, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.10326096421189704, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.279108907377594, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.28062519500317484, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.48566527968707496, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.19970444443154955, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.4423950546113178, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.27409004101993995, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5284886834986656, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.08236287459432604, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.06513324878792714, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.2888842986482907, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.12790043463054807, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3251137272879362, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.20381173318192514, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.4374242147608937, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.1863636050757979, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.38327347200836553, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.2266753254734694, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3852115955641439, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.1499204981771791, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3722568874154844, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.14104469432134092, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.24275134970746923, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.22419461419741465, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.4592127766399572, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.19211812593555902, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.49977246377030776, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.18531976869758127, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.37963243794803875, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.01233876574258688, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.07802251833851097, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.06251587118034302, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.2652243792907702, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.13774451171759725, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.3392358805928605, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.10995304272182382, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.31516290141130093, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.153929566721589, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.3608257907466213, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.18480496206296063, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.36919372543608214, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.1730275917150875, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.3674830543699603, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.013515578278040412, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.117607119705924, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.20326791526595261, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.3619867553629671, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.020812790241140106, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.08148147559856574, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.14424669882788801, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.19255699527273906, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.38859820242385235, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.07808637566840695, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.07616370704276611, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.24809947007268568, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.09576088800276077, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3041295470213464, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.09587088737227797, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.2616214569856814, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.2623913464686138, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0964828084451256, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3032949665048769, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.14518717773336579, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.1838327121162663, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.410915242672095, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.08971305206542297, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3602440247006994, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.11918841281221274, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.29606868731001496, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.00580156080768459, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.0656391052988084, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.20131698736551876, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.09302909573829315, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.1930745826064226, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.22277635660975664, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.4138180006822518, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.12683453413302323, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.32324923216936663, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.10923883271644276, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.24764809403476246, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.13767897792280337, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.2518191271414786, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.07620016398847337, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.13944355344056922, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.18493395776181829, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.40795153012607455, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.13138966194745244, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.3198162160638907, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.06438908961410146, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.4167663768476074, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.013538368452335667, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.04880564940883189, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.10949933984953873, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.2483480632420486, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.10406813778823464, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.32304117956922, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.11433133814822793, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.376685707847211, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.16566832287055458, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.3899280134984665, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.12219090986337702, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.3731186900607078, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.10132612615353878, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.3332972208005526, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.05410002607065616, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.22982437342911913, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.5063070913117304, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.215820087371041, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.43525547120246666, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.1722310583472658, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.4259958309795401, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.005564453544831041, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.06864478605677955, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.06011462639386161, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.2847809875523831, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.16279177723066465, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4102368807414641, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.1418972100477343, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.35549463565231143, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.1340739087751559, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.29651992518206227, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.03946884506458087, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.21197738023216034, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.16868089464852234, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.34843578398752406, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.04783137935732651, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.2178243901049148, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.21864626600355053, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.43449793297089706, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.17247694128916682, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4371568532396031, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.09310860627754827, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.29711898112528934, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.23063365203643552, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4976085209077489, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.008193994475617358, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.06416871627119347, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.04977881551947034, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.21589989285424255, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.11739016262053983, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3243598686623998, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.12391818909971714, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.35690404186806807, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.1293238011288526, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.4229943657542825, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.11994392827207193, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.31204692452038146, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.22226034707641953, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.4087847602622323, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.07794901287174041, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.24352940914340515, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.24693288345135292, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.47256029886426165, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.12875681763458682, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3993394251736514, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.19652148611100978, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.4282480517889739, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0351824757158749, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.04596692032850378, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.13010283341659237, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3170762221054434, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.03625427565378279, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.07306837018670909, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.39671144662530883, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.160940602711541, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.42923242729167044, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.10754901074081791, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.4366819145999934, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.08724562123777481, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.41003179675795876, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.2316040185438685, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.14846392828893068, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.36181149063759965, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.1085914542989672, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.4077165098927507, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.4355810088704621, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.04925147838126391, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.2975513707144851, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.04819270896239086, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.262682816076646, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.11764942628273296, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3661118918972216, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1546502365264458, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.37662498358584207, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.07247369212693866, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.14796497932000655, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3628325212637828, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.04881966677808663, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.25763198229409756, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.23012496435016175, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.47948987011733224, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.14926399654342473, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.428199968972176, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.15248248205315285, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.44007880925740467, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.07284789180910362, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.2875325046198955, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.16053664506786586, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.05360431433749398, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.21840614866685698, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.146687038685289, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.0366752648089007, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.032273297160432, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.17740798450045847, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.03334273639438118, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.15056586105615624, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.26561679924158815, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.01793435395643411, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.03789062221672838, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.22901989299104952, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.03624533112227059, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.13708025640641405, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.10180668728147267, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3387919511268085, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.15878190990009447, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4717304128091511, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.165417615101112, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3723049467309275, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.13331291985681393, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3257699661757467, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.13692710494817806, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3787409988662885, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.013310536248432623, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.09174273696529235, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.2985451377718839, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.5482387728422947, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.08575430966320656, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3288420528759681, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.1531944282330758, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4851467956233017, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.01626757442125486, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.08310080113327457, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.03791800710695544, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.21909149207325135, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.14493605424503186, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3501358359880476, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.1270407903803617, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3551178453495837, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.08711637171502758, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.2954967025562741, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.16553550114612764, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3714847127120771, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.1391202023366056, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3301375941706545, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.07640690767869955, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3081247577756139, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.27425373620095284, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5028086616418246, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.19319119695627468, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.42057504262138584, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.15767660520106525, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4246819037324454, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.007158196458243923, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.09845213748343752, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3055875344104902, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.1502843121431226, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.390370937242866, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.2089790450123036, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.5020912846079854, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.1224795031512657, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.29049145213151384, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.13000733187642288, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.34493406005242266, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.11869174098838171, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.28873225679782916, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.06713856844474306, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.25037871529772476, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.27359525909456617, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.504913886563221, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.07024260582912173, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.3102655290838244, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.16352688052951744, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.47198435779029524, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.008423858606448082, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.07656193246033283, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.05416888177698682, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.33860046879314426, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.08810199114550488, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.25421876554719885, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.1723666822203382, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.4164179694728134, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.08174592824469444, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.28125186470074287, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0821491619656783, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.3276253201601609, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.13455263708735177, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.33030019783272807, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.005072422648731631, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.10225328551984775, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.24332752430414528, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.515325177740931, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.12045545488000142, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.4270379715218122, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.15558277244135618, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.47290283753021434, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.008715599257989518, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.07480070273160805, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.21138349896611064, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.10716023124329743, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.33305109264677923, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.16831858516266504, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.44925788158953217, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.06005500834321576, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.23500168157276313, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.264506771329212, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.155483560195194, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3577679330118475, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.05134602618173752, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.23119628922687724, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.21846037005111443, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.4323411521204525, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.07969115995241391, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.2886462965890724, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.07486383088392089, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.22899815715803162, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.17322793980422166, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3827655782429965, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.013967621194811793, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.090985176350426, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.13648611015535222, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.261359122933737, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.09532344847561978, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.27911273015721655, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1620442038279161, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.44671116906860114, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0808656459604844, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.31629942918298065, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.2156293226810056, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.101592292592622, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.27609002958205586, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.046672540599309545, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.24102097474536338, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.468594330157441, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1297242489488828, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.3868393915563033, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.17933350957537242, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.43699109907071665, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.011127070300895376, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.08758323198230493, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.28297301636963595, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.10362141065231315, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2976713869608838, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0535442755496515, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.32534949147415587, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.10502639606076236, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3443858948320673, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.10787346966969634, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.27908818408460184, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.11033135811581492, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.26224674138317167, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.04113753433682409, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.1528625963145929, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.1552004627188075, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3784281204778653, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.09593662885334717, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3241465595416165, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.10116506361132198, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.33164984003681125, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.007131140141430805, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.07998365456424056, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.056716976776784944, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.20621198166782573, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.20843904443769717, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.41987041749209575, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.16892311117272107, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.42944637897474097, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.09407649401384535, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.24845011725491462, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.1562430189793864, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.3707779359151589, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.20050198179023138, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.37497064056062096, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.11693769283709352, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.24807742053452883, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.34641276682746075, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4843161200028808, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.2691093449283904, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4584117553286684, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.12808121127467118, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2940358902223427, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.19341593401394422, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.45750754600729443, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0070813413245724545, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.03917953408872804, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.1085943935733644, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.3150869338737483, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.08853913752009362, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.3699120467510799, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.10565962599924915, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.3576984412786932, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.07750873793282746, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.42519667805364314, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.1993887445781869, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.11740286850698373, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.31504011454506725, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.029191792762547256, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.16799318698762442, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.36562698117819364, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.12155966905167627, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.3864314923842429, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.08288641431941539, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.36030774861919207, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.007498600494904601, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.0920325108581037, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.004179571578298041, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.07351837954322755, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.16306869473764823, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.09530685695259108, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.324860923353352, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.06261022269185519, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.16543452283536295, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.042425836212707804, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.1532311899154315, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.07314384162266307, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.17064167037500447, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.05263353785583368, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.10555576819736473, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.08870141712804926, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.2956750528912434, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.04316074561911492, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.12179766740107663, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.04960414486693654, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.15447083970732592, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.003598209291387237, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.053420519861741254, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.17991947799734828, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.10417702105111015, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.2840081496768262, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.22195513186933313, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.41825817264037385, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.10512531898680018, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.3280524648390448, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.13250044494118035, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.27178201004990943, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.1683751554206474, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.39066705722063644, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.010968538854373978, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.10609012911479784, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.1814330257149915, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.34300085767735583, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.12977918792751478, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.351708163232579, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.13462044240543036, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.38599010799499406, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.09645717513252113, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.05840767670858235, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.29437564634854996, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.06549365852999947, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.19948256778755252, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.33171820256493456, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.5317418755857823, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.13801294746633136, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3369282304053488, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.09320127430844248, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.2411795889403124, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.1383681036030409, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.349504676054034, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.04693215248462417, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.13798005261606958, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.2358588586070764, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.49341195538644056, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.12439394521251071, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3794511361786166, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.05226233169864726, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.2706356884347931, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.17905278399134197, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.435154701956904, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.01158212560913509, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.10215743793929438, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.06927836159710253, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.28632181397023265, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.05045310628409221, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.26506847806653416, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.1177915530693848, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.44418786085508993, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.146547094309055, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.36840754531694414, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.04532894170735517, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.26844337122586925, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.09624394213587038, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.2754326129166975, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.04595467899356206, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.2060042918766775, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.22263404925684163, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.43676191178353707, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.09537286664645776, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.27089796392151044, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.4004950959515481, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.013176168036187562, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.05448980490008836, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.010248885552626243, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.11174990539960772, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.055310713185595, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.25742908684385607, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.20576246677399845, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.3789603596807018, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.07528967210822063, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2659777826374339, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.04876338988182848, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.25024446120257093, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2811165646467243, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.08119930104030995, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.22749421384497187, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.2308676152532237, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.45501496824447146, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0559808324315414, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.22623967162682507, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2487940867712376, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.12980630299792445, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.30447471429985073, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.005458377934582915, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.051109671088680884, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.04977881551947034, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.20431472158182293, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.17509809383946048, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.386834770913833, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.16322400014183205, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.47216572692709596, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.13050295514132168, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.37331190108047335, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.08956508021078231, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.2831884743240396, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.10389284801515357, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.37748572050737733, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.010386843829287925, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.06479288155356677, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.23300319315350754, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4491554038721914, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.14673461496683476, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.399781941109151, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.08038264410981451, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.25459802441879237, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.27240647173555915, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.5364888216436483, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.013738103741112312, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.08271100002634647, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.04550552601380896, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.2881361840023623, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.05116952807627418, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.10314036721569257, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.24963395554422432, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.043321633865040066, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.1541835268049213, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.12155067150546772, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.30434034369814117, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.10171104705932635, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.2838287277443614, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.04322037497124764, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.13973249426000348, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.1909849699402812, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.35574761271391786, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.12335871645688117, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.35359462961003574, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.05039339928845027, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.268359380285868, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0032325239534376927, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.05217514663240744, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.050132348463440425, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.19059644113563182, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.598931508663349, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7353063745802827, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1327526847508867, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37850602486495205, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18405035438430847, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4142901090120915, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24239458593560292, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.32069132319909655, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3857436691295343, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5750224388123065, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22478613858269392, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.44348101018104913, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4026159305424288, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5712560131047175, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.183687049781416, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.351911486970854, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5181825846579515, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4262221594184117, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5886657414856064, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25861130592298187, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.39452644092432093, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20379250618355427, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41085414309816914, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2990226215771518, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4323734152924571, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3621517589760531, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5866873582151947, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3012789660952507, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17248715680799764, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.40043565243219187, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13835317113453516, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16343842313572918, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3986641525285075, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3315037521841549, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.468197879470805, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5792139686527714, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.25798723088167685, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5244854229988815, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.460474309246715, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6213537794704693, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5676965183365866, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.003172770121174655, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5834549494301647, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.22218130727359342, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.39929356245904674, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.46092611919700416, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6365915338629015, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3344305108778801, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.37017501464955627, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.3194331635465395, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5031092445628172, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.2974074484950165, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.516333943378855, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.334422418242443, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.47577086062579566, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.4482907809719588, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5498272118133005, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.02467424260792568, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.21902340561392236, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.43906671679239717, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.26538706048179084, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4982627378595717, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5234484809182233, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6658297773613274, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.30007504691018483, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5153810823423555, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.14074957769288798, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3750035199199742, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.232738415750697, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.49618971681248764, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.3716332023564544, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5698425037615289, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.3488611533620711, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5550499651473632, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.31883477089875656, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5510450101159524, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.038236956722392024, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.31998097041178836, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.15161074985415177, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3796830006266126, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.3815250264738168, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6516314751979607, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.189902924205034, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4072184389907138, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.18710260593933364, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.44334313717706003, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.07757069009917116, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3253161209971999, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.20292918891121983, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.43895593415558826, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.22168992033645996, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.523689661176845, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.2722704374402053, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.596004654894533, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.01008902035184167, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.10279947040838337, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3569840483632983, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.18679710353734788, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.3876457319870774, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.40003810431098236, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5899097408105687, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.2097387761551816, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.49663301508497226, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.27067168022307464, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5439625482235064, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.2826204057042236, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5043062352893725, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3901529878372595, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5914927912573461, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.29588994069727786, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5527117669081858, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.46732353406180216, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.6059276585345114, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.06266083709457643, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.36565527196849945, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4882803186347697, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.21812881407613688, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3598346059855135, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.38047531731529327, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.49485723102957346, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4672309378181727, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.29886658673327365, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.1367498402979849, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3943841419148219, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.22848523472845572, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.47155340591403294, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.5676006714726635, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6880701448812352, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2974074484950165, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5121581247515657, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.010162846529607748, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.21947959999379651, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3226457008913864, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.24586918158076287, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.4658595745396681, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4489235959690452, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5934678825154104, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.28800869328515505, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.49348678623542436, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2429163097293302, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5044329486461447, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.23073085454808062, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.44142087654422146, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.30167234272902427, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.543245170223285, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4406612884550454, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5676112112992767, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4476950425126913, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5932980209045412, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.1483315516064897, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.25249051585915977, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.38558450790399557, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.0067104198717751464, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.9025232868361638, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9169897590736298, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.9709835434146469, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9951728990866464, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.8935248372106969, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9404428602061264, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21039673882735752, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3872019296036794, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3642482472579296, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5342538783335161, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.18831933500600306, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4318025704181776, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.4439623527529193, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5309137918519957, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.2628849077177109, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.17879309995151985, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.2568045428196672, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.30815875749045163, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4611100423417512, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.6099084961389527, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.658015760514539, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.2464380578618272, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4278399263644655, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1879604201975219, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11956615218925931, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3305337714496588, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3395693620772222, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.28406136898728457, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5649283064490618, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21544027588567594, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5040038440508637, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.28552127890094825, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.16448947606185552, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.11823053204772466, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.24615921057796505, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3931991982536581, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5762809938404015, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.3491726680217181, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4946434087697324, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.22916123454514536, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4192305796685782, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.03586767012087445, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.10393938326032184, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.33864898055191395, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.22556860731509948, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4747086049005634, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.23843418577408987, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4082320855803597, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.26970223719007375, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5172978597562362, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2372622545962587, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.49004864454711367, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.190140357671548, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.23647235972003527, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.41650969469918997, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.25863626048999067, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.501623272099173, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.42662911848025076, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5800596652250789, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.39420326688847324, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6371076304605184, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.030501743754356173, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.19352792845274666, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.3925864519770825, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.25449674462950855, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.44805409822643144, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.49458876622696707, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6179893617801274, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.30630098078522544, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5439056051092116, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.3059872016765634, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5499457869553984, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.20076347441707354, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.49625515445592083, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.18665948437666813, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4437597552815582, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.3424885275916587, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.568309937115554, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.48670274592792, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6717971788322309, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.08002354055277362, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.417372155782838, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5043502592801646, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.06680433144407034, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.2919280798407827, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4076170046499833, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.3312570339636223, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.45442661484375735, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.41520313827696, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6485212540886613, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.19850842371858787, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.43584341835040474, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.284161309400485, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5096201523229312, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.1134451991138546, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.4186167762559285, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.24088562704853508, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3796021685415706, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.34401346933337906, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5495898550305974, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.43874832905672956, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5882858748700781, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.26652403565303173, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3270207865532903, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.006769280526888359, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.35465713644381464, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.4911561718424494, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.22744906705116497, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.37233302529431345, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5112697990822607, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20170335119323748, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3541251997977811, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20170335119323748, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3885781003640365, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.19139378056089276, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4390566098420477, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.20533223830207933, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.41944354523254873, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6003681413895097, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.28392242834976933, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5447934365522582, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.21644298243944068, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.39638009285990555, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.0013316837154984055, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.16192940337079562, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.2536107727939302, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.16542259679471108, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.26975832150444645, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3857901147929391, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4926358895461277, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3182774828667731, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1214053825777097, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.30188342402741686, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.19420534060688366, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3642322841308566, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.307338019036143, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.260711748598298, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4104518639082048, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.34848192645275233, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5640398262082806, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.2834052290575623, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3513608621054937, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.008037190855333259, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.09960206740894453, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.23680099011195122, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2831988281847858, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.16885023000999705, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3897135623573608, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.43975656978777905, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.21883928293594496, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4140411856219547, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.14270596284245182, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2812419410574613, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.08966592262979808, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.15649677978231225, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.31280763439438314, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.518277214190325, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.39448834349896583, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.5955979870734485, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.38006094126945456, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4952333573221428, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.08451648022462464, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1919704825961155, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3465066144910527, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.21877512875558908, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.46843499948065653, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3325026294099889, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.45805631044287126, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.23530033724858213, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.46208607300298377, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.2936164862319274, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.20512476044697742, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.41887716212519804, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3001327916132617, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3729157997624686, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2329218075757412, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.46557005506050453, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.364475284252601, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5303078856093503, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.40570535345933584, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5128999889612808, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.03788280195139692, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.18426708758406696, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.28632666167603604, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1759696284842668, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.40989005404786566, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.4824794737945071, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6479459161283603, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.37284027455688556, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5528347504734102, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.2699951684630893, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.48283570465443887, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.37458689678657137, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5282052116558258, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.18272411487051296, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.31610070189880046, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.38358255514637035, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5669740223554237, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.5368572660911874, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6191297828676535, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.17472341341804662, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.524685968336986, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6168947391572741, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.007563050858412507, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3245592826703331, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4961612243992949, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.14722675403683808, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.28846790344058515, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4656455050518963, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5477103600632085, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.38846174119508314, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.13497849469044018, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.294518464464907, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.09207365845406566, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.2745662608288741, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.21555651352032254, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3334657398473605, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.48862358384725524, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3325507240998139, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4074972493988868, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.35372399264817345, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4415325921468054, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.17383350569405645, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18154339087127422, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.32445288009194484, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.2542828011834812, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4695941026465371, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.485644095022506, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5923993450097689, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.15487293534817623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.39293494862736383, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.2746536544630471, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5102501783603998, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.18154339087127422, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3913469689541562, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.33281148468111865, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4458341777155295, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.286122396488242, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.48996353650153573, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.3876366843353173, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.49788350946214466, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.43024400109694355, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5419412911283815, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.00801785790731701, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.09885362316286796, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.2922887728653336, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.19659284558894802, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3439604955527307, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3150760288937462, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4551575101683354, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.21741853044139284, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3535910166292039, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.22171131219306292, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3447082086936202, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2006070365475092, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.35578462620981843, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.21812881407613688, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.30905257672100556, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2614728375659921, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4319733175313442, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.07757069009917116, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.349379547582856, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.30548599245666574, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4679798467191344, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.014790009967292654, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.1352102459252932, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.32096940912916827, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.5046613014990851, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6377969619576389, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.6485902560215636, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.7627201392474565, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33626819961829335, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5466581859383387, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.4947540498519851, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6678485036966576, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.13753714471937797, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.47439937147594774, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.5037566723027213, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6887059730011686, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.5401659537103226, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6799874350995705, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.5639943531321595, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6640560919035684, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.6878626650756018, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.8027840472006857, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.009344129172007202, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.25070100037410625, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.41914475317559957, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.20326213373677707, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4211311971791892, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.38713346831820944, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5376493568188783, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.32000331642122953, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5480591855923784, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.20926433421787555, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4046472012888725, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.15758218479424427, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3026681380066168, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4321096865782781, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.30296887338419454, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5234904895800095, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4096636841225722, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5278686169310903, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.08419519560443713, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.24974396789011238, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.29040471938315554, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4617994565642428, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.16484140461834484, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.21181203648983093, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4245081340557523, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.12890680068769322, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.2478022357548686, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.49124012500448727, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.21132630077912357, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4175670766052166, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.22554684328666952, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4891635965943922, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.1370370495710889, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.08013992490936363, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.1552012946702491, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.293968634178725, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5129012262464672, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.30027814350488985, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4842037100625574, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.20881729496822948, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3442652041844856, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.14470626861490504, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2566358039205211, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.14440270272056518, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3829771215415724, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.5543498698280007, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.7016802877815009, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.13108369255325433, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3929302741911199, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.36484904083194636, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.568282580396246, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.17997291109158148, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.41721274496653427, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.16912873274521933, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4587620543754354, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.4036650481496061, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5988798086699901, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.17864866390812006, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4085144222648808, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.4333488014632613, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5965589158023283, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.009743563745649522, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.31745891481127153, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1667112120846934, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.30913125513655043, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.13203823352287472, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.28280767234695003, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20174045447955946, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.33729298835089516, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1667112120846934, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.3259291852615986, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.15756751766261828, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.32067005734881104, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1716931385864913, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.3193195680733763, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1948024190276591, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.19454290935168922, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.40563779058474464, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20365290006260964, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4327934810550205, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.12475528808004005, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.23623824056754247, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.20455275179869584, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.47815735761186096, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.37420316460821246, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.6481907872475802, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.10878661088699644, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.4101850467281004, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.14636985946104297, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.42477479000228696, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.13490983794909628, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.3721166387799897, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.24199061099369143, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.43083710197985886, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3821120008009713, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5899756751768851, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.33904091445927403, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5149735242342894, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.06556262572922589, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3074762344614454, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5504296088375491, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.009755969152501744, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.26351629170506197, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.2280725846401638, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.32365707034585395, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4584149294578286, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.13894362470892055, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.35710461878741834, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.13763666698607552, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.35163868522447556, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.24113982759255023, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.22171463088948726, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.2988062022268337, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4421623642899925, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.34420876516527255, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.5068918436131059, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.31461500509304885, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4233526729436835, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.004693571154544651, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.23316243238650552, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.18945235333331134, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.3443950901432381, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.26637160423927314, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4646735366228476, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.19585063466021865, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.37259242197674974, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.20191019088262566, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.3773476047938683, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.15487634919018395, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.40723956344919005, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.10067881517323463, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.3394719609522645, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.3311961105131967, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.5192887771709966, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.24263763794008045, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.45834072271970505, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.32018866449570155, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4142777372959173, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.008115464234465965, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.17226307014894685, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.2929617403109482, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.1581734375963556, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.29855796123914957, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.41749084544527715, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5512835123193889, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.2661644792674617, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.46546633658762687, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.21203547746686083, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.45610528927175875, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.08644237346462266, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.32003978062314414, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.1602999168643203, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.3213856523369838, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.5227070077068928, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.31148557892944495, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3867464805058363, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.3688189810109343, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.4691915858995521, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.0465791689548439, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.11175354227446688, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3298241713743613, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.16832254701348195, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.34172735320777375, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.39245477087067665, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.5302932215753233, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.17171159782066198, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.4250539223489113, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.1718526210271666, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.355144746174474, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.18597468573870948, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.3585545206106179, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.2543269102041851, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.45657979346800803, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.27593704297838784, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.4517230506106012, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.3203505804334101, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.5201747871509939, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.18367196864850696, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.36528750965938445, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.1642384866095333, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.4097157364333445, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.4592265081063172, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.1567864814929981, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.29896910170538116, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.41423657620328247, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.1524830877429947, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.20357681430297922, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.14691264038600516, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.09802862511942351, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.18018110949421698, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.23996024992906517, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.3763392895892706, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.1414355019095281, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.3460029262848581, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.08747015602038587, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.05963570658750385, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.2115339554327366, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.4102495208939548, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.6896260480312464, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.47475604110292025, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.6309823582859546, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.2685172542195998, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5385038880406502, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.25284666735526534, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.48906659909069483, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.21208798872596596, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.44139630621320486, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.13801642938000075, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.5926964880100889, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.7449340020430956, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.3537581250657245, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5377035718944764, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.00615279366159919, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.006004997316698418, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.33636994463223036, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.23511486401816076, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.4879376273715227, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.46676620029268096, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.5960578086281888, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.21011665246752942, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.5022122132107005, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.2372622545962587, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.4647111452373025, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.19849167569028262, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.12206082504968152, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.2550055792545815, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.3821120008009713, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.5766197400237346, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.32263107476456176, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.49456548094666447, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.368476710110977, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.45696515917745634, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.002730707494799792, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.2931120472907597, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.4496606373231774, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.23892317781280908, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.38939729384213495, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.3481041677117235, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.5683248500873576, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.2931120472907597, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.5551374467086605, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.316948318594896, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4848206539921205, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.25029077088688034, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4952931350872444, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.37249688769592515, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.5898027592815512, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.24750028117795922, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.506555887771498, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.2816170941913842, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.4822027054198335, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.15419283939507827, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.3697607442845809, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.21246986343160065, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.41946495258955513, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0935071820875984, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3104146584296793, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.46742290550462806, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.6201011845566285, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.312650266868888, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3779052914818611, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.08852818280995225, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3389067344533533, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.13841356129663587, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.3749458691052568, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.6457225344420294, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.24066493791066654, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4042037876162315, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.198980950307606, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4829816647739418, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.006459948320413436, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.16144218436085053, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.26044492723898, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.127245399039237, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.08385867792924734, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.1131435241082435, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.09051634336084007, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.0821783833049982, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.09398807997403576, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.21457149286802543, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.2929404812443863, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.11152889019962353, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.09318017229533021, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.1130888847477738, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.08874288018896902, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.3955646912745367, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.5363251034746059, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.26460159523593296, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4803700055675181, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.12561164303321054, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.47038542160135094, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.2579623389364088, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.5445453384696608, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.21367689804137183, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.41730988147513415, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.1258504577643309, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4394273483551192, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.29217369652859104, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.5438967020554117, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.5060596870227302, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.6440208597098943, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.1768429537093963, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.45965854198421413, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.008135271427712344, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.22821898087604608, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4078372564340451, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.12748893606628386, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3557730263992019, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.4174381195316011, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.5467874115748124, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.18187597339521155, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.47371700399657607, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.16537176735666792, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.377850748745802, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.10434737222357285, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.3018898621064535, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.4538018546822346, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.28525161483357986, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.523502554802427, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.39141851975461767, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.535715042850057, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.19736837482852188, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.33390746942953115, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.12309701217461798, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.21012928594872415, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.18399961762411743, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.24762685010055863, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.380770036603192, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.2372622545962587, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.41017604192890195, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.10941964146797248, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.26824334216983386, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.24960845712450644, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.1972799636982706, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.31568280470164284, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.47374051221492314, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.2511257790821319, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.17669429015388666, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.4466982237235162, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.028259642508149857, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.25363469927171417, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.15798724534926178, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.09458362068147118, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.19112709920459806, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.26917712505386043, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2839892385105806, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2145552140989996, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.10176289493427026, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.1958289008987353, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.22495130162065566, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.4290284967042129, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.14913041249319167, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.40440131346361496, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.049444999923640494, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.13746950527152393, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.025140789606938285, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.07209117403380154, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.19964955525867845, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.11450137919698138, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.38588319985262204, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2529258575884984, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.43636064973584276, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.2206598690689583, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.4671100153557676, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.233464833213293, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.4707402347794446, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.17712030567063494, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.39640748418199956, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.19528111792372993, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.33640339785796086, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.29628055533074565, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5608629839236193, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.3738380800497378, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.5426797573029211, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.23519540953587326, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.43302228498495166, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.17815505223677908, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.08675107600029897, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.31559446372647004, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.3284807347106534, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.19276506991327308, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.43458455506290555, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.28127656729267564, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.48992770341073005, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.17027553201166185, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4357050316091199, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.2843469008409722, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.2316846840057159, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.14646977906722897, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4187706995483084, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.26460159523593296, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.46213112933303646, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.3104946263254643, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.5229367568544043, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.11661990462712164, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.09360571337807427, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.31554500596740015, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.2584188371476316, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.37301467093894103, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.4094746713677566, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.5127629442173787, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.1978740176644931, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.3475150762120378, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.1652327470881162, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.36089199879242, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.27241531995898394, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2301408248787551, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.27399342653607417, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.4466058864179787, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.3391824705480895, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.4674445814701926, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.2623653043059387, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.39491069838469317, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.03595864795288658, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0983790525743442, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.28375777588367945, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.12525435825547931, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.2209612814301191, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.11872519045542135, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.17676570487933688, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.12829442435801275, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.1254577221421089, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.35339573017922915, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.5732827403734609, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.13551424610107243, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.07442538499398103, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.11642018526341193, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.12864673270139967, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.1415901806866318, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.12467595443249284, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.2225814067024197, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.1386200663966604, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.21110981509198895, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.33555397124550274, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.12977351408339674, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.07868103002431528, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.16676572906614484, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.32290710008434653, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.17085130234342075, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.16065348926853595, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.08308045537204375, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.16672381607546233, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.1507724411705623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.3882065230979266, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.5170531810333934, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.27850162207652013, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.4893867900242687, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.21492809973044594, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.4460691618745447, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.18709023221104107, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.18510871554566785, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.2971880635965443, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.2105995600117642, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3893190034495383, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.40665943808361543, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.5389281530404847, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.20298407172594948, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3472991039100052, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.02525776964585973, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.25984404704136116, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.2686424829558855, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.3998516011592878, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.7037873295747725, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.7990268043083656, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.36484904083194636, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.5860311918255953, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.36429452903417536, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.5861802967934782, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.20871905976580057, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.5638397428015866, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.3020264362557517, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.5242629551035288, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5690431244531307, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.72940147126271, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5035531794651068, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.6494392775548152, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.5737074989070577, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.686909653107697, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.21153598996567438, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.19139378056089273, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.3315891109577817, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.14220602298368126, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.39434634170155597, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.17627564495170006, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2829970218265084, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.1526113403954924, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.15819017856679035, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.40022902080643746, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2407047600997767, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.1519625654965485, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.22790337558585338, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.4825278786131802, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.22171131219306292, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.36739932894360794, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.27896316893887296, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.11404966072004964, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.22630395484390933, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.32904884841000887, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.24044159635524803, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4467052322987113, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.4269094075482239, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.6440338986542221, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2089934379295256, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4202179280810887, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2218554028345633, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.47222888132317115, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.20696541555694187, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4196178256392328, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.39600415644017756, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.31568280470164284, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.47331489037280405, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.28948458010419736, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.5081976030749167, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2782683483892406, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.43863270603769017, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.025462287071061632, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.2730653180659879, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.23927776234174902, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.42492275577244626, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.4192565023565104, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.47988160867636526, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.2825074232826454, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.4576805072760733, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.28387021048806443, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.45495119911742776, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.1445102529736995, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3613780570670241, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.4916952221580248, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5709218171628765, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.3167634335910152, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5181127153993833, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.42682382196874513, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5110919086972547, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3587966530826893, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.44358913447937065, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.5169126503115126, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.2356445175576872, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.33813801354578843, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.4546259941134185, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.24073557586211028, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.42665885117139607, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.37356506757316704, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5184539879286838, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.26105773506180324, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.4938667045198544, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.2909854083755658, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.48665440760963286, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.11900012437357166, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.4188188601039167, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.25411310952997296, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.4590241344003793, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.31612195725801134, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.5514235251355991, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.2618674380230805, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.4556417466102314, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.2415484256116827, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.479721997599474, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.130697570216376, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3179818747026243, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.17080052973495516, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3276453442491313, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.29977450507928116, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.33448899741633614, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.4659921217718883, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.08369850049073722, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2901642042976994, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.15444910001912215, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.3602646769850153, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.08449327708626451, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2894243585178823, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.15293885404881336, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2731842677262201, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.20498415630763028, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.39790541041372224, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.21856390681335636, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.48001348316701725, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.08071364532479955, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2574794098875714, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.3001327916132617, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.4410796277431629, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.010212667284259833, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.08709145169981267, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2435168172078968, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.15901023657267704, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.08048862002869049, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.1853605878762701, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.14911046740590622, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.15103059042446493, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.1571296183656964, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.12287579518346711, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3025336571585583, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.16684195647378827, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.4434377597535756, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.2581209976452507, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.03686480365068629, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.05917561307723289, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.302500899543063, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.11298984129215384, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.1968483491328458, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.31528213773035774, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.19951297936500814, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.3268993135281582, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.17837945138789355, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.25562737560272863, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.1403229830949613, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.12487405142186059, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.17100054419705868, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.48852222717068033, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.6719348281406861, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.3550594707678603, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.4477620872010772, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.16748366556857588, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.24705598637146278, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.06427240011492986, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.1693981615914476, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.23465398368701498, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.45367638954943146, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.19472361650315084, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.40377628308461305, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.36966653028794927, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.619000557599158, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.38110389492916247, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.6234623518191841, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.1919704825961155, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.48258076456494625, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.2773507370781619, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.5159249195742764, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.44317342695520384, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.631619947257763, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.5099592281930094, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.6072298627909279, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.4631700687380434, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.5030365960736705, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.009897295481178127, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.34260821758540483, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.5279077479961488, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.14978817967886865, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.3579627976703376, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3428955163829333, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4549331138881434, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.19374128629783371, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.42300747804792893, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.2319904737965691, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4289093241727338, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.32950396894850414, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.15658555631153465, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4282436178753299, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.3080840787435305, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.5213726866484168, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.26970223719007375, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.49889423329457655, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.22688173051219204, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4245560010070955, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.038798017720540624, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.20189070954084243, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.4058343737951635, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.19196608410425278, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.4492698041709823, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.529528271776728, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.1768429537093963, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.39729894405642474, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.35462291728498596, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.5146453792122839, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.2548688930100782, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.17503462388587468, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.44997796125841427, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.6182809391974339, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.3911045875580893, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.5649289943680694, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.3074421255707261, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.5565613982556684, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.17327326423126865, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.25365021154352885, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.44903119868153707, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.1783827232160263, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.420993901006138, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.5471998982127312, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.7261143787285806, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.28098066731217336, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.5947928282659879, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.2875226333184278, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.588006186288413, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.209915242028325, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.458928494199136, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.18822842483525964, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.5517982259751538, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.728463219390527, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.41220068332005494, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.5170083079058972, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.2954601793007412, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.5471466168379375, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.05365191963585759, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.08673245635389941, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.26477349066797173, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.3471867713780383, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.43403373030619585, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.33731180652769377, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.43284789974371823, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.25107542027773755, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.4717531424710489, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.3060434879538489, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.5193433772960362, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.11350052389636105, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.37168631723243695, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.25107542027773755, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.40205887773726906, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.43242099485313257, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.6199378105417234, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.437242133308854, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.5933352820464992, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.6182013682539468, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.7316707888516918, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.010138118025021337, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.49661681529787766, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.12765417324195955, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.10215316399947212, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.11108510046198607, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.095671247915619, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.11762645023115613, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.1192788515122758, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.07197439647670131, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.11667693485554276, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.08933914980645931, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.11553650371823457, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.11422824640116801, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.22669018884314224, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.44041899547143254, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.1701094120440135, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.29286151337006916, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.2927933973558131, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.47375496223846336, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.23576035344764779, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4410130386349546, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.12806342795779524, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.11867161308842614, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.2993245195993835, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4935525272820318, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.3174643312173473, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4184322271006407, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.12737854791303035, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.13241068483589094, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.08640648924627486, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.15521968858995389, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.22688972701544685, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.06874614919231697, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.20986932076566409, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.1441116002025322, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.23046678878134805, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.18415822638260726, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4239216335535842, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.1648232576186581, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3767452176195591, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.15481642632105738, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.12441731199858395, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3059238664644299, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.30255502678985613, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4951608503180153, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.22364174996640396, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4274071156780137, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.39386077108352097, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5087613640815436, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.22727859775180315, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.1311928332391091, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.26532814879858924, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.2916607130801699, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.530090321409246, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.5344974294085829, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6840734337993466, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.24006100607956476, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.525627718929817, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.2138189462062278, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.5026986017430701, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.18590033912422976, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.5277132126206505, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.27358384246243783, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.4269383120062787, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.4241307927670055, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.6124845435071574, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.26035035646973526, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.46778616416931385, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.2924332807580828, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.5630086380298369, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.10746287309590724, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.14373108296174128, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.3775704790319748, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.3180137236294719, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.485644095022506, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.5954238368250169, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.3762634236591465, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.2746536544630471, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.5097703431719256, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.19953096796729, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.41765409316042634, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.3132259424523443, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.486985188451814, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.3931991982536581, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.5785532289502031, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.3015379177292923, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.47219984621571265, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.45115893481205593, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.573184490403203, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.03030681702844562, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0911209149063426, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.287607153713456, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.33384668064796064, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.4509736001471859, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.27392978689521524, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.4257914116040595, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.17712030567063494, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3607125350126197, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.20588295924038905, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3896356890700865, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.20326644685906695, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.4500858289192976, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.1701094120440135, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.27393234790621707, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.29187438961211887, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.43018985278558913, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.3163068724060186, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.46966390516937145, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.22900275839396275, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3639427717471459, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.02133189360351615, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.232738415750697, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.4383057556966281, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.20135763034646928, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.35006080282921004, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.37565846334638286, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.45365001576190844, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.25087428990920285, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.4056641749589937, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.19984280537514973, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.3848167376504214, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.20551293694553077, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.39212627739793066, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.3527900076150821, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.4631149439498215, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.29217369652859104, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.42254734634105184, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.3824323271187023, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.441028373714331, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.43683636444615426, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.4737804899251307, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.15471749140281507, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1511256708696252, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.32068800761395755, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.1529665009156386, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.15471428129658021, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.23308330456083767, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.19252575667755267, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.19291730665464807, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.05506209809653264, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.16401345929505193, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.15744611445105594, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.09762877195286271, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.29743205258174543, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.20630474086364167, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.3136206747907053, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.045871816440760785, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.13296075981357097, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.06757329102794982, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.10260020371628428, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.19591271282120945, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.18545347920261077, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.3552926519343793, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.5830807494515953, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.2440425026082214, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4841963273421365, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.2665173445221397, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4773652521033466, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.19370500984280545, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.46082177091138216, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.1339286445060473, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.40284578235338797, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.6594216880289406, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.4306385337550844, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.571178776405641, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.3972710678215965, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.5660601276423057, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.1271635774096859, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.10975134114072839, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.12514106545496687, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.07142565875275515, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.156820371591221, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.34038446123808824, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.5113782796801761, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.304657077674182, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.13440935424304148, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.07372315572460936, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.15220003737840038, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.2646502686401735, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.3670756593207092, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.25837130980300244, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.2908618876976738, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.16178785287683076, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.09811660434035342, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.1246356401043489, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.16340836420369564, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.2758741093443254, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.1423117641326302, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.09621288561909111, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.19434864892148712, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.15449597020263703, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.1548405622078228, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.15523404519916673, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.09802862511942351, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.16839811363216226, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.07197439647670131, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.21806094673608334, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.08157532940354172, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.10648920758550386, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.2161011617270055, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.006107696271654331, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.38082873348976415, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5493718467604206, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.36481383830535447, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5676950665117104, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.3021375397356768, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5402514274709564, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.2879557238941732, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4713401374151406, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.19280597289814436, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4994355439174767, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.3020028072429882, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5086556742243997, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.4114081637091976, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5087711002829843, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.4333488014632613, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.5965589158023283, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.02862331475958317, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.2639691450290325, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.19951297936500814, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.30595434646385716, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.3491726680217181, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.49174493737704345, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.20326213373677707, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3965155400704921, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.24460949326708067, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.38733092970267924, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.15491814189192113, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.08792400740884215, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.17948702635267894, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.20904996083879818, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.43827728785185827, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.3375804740497263, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.4148119294349798, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3014454929306372, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.11817024510657297, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.15384750052099497, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.2740074272472142, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.14220836651767108, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.19783691883509516, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.35420217205091864, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.14287380394182542, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.23836122054487227, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.1416144676550763, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.19125133772985436, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.28829924061405854, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.2539342198718324, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.44052227552801354, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.12360545410216946, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.27606723250380166, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.09371774386674346, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.25645319708641284, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.024496873668008953, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.13731102930446024, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.23813185482579471, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.14713433937849357, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.23515811334479123, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.41733625901113247, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2550611509722341, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.12119462149205885, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.253313434440015, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.14716520874694558, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.13736938910605517, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.08898219182045804, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.24022591386861764, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.08044369277849628, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.33820654616065665, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.17525088821946255, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.34470995240873203, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.07526061966136668, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.12790226901170157, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.36821398145189993, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.6015510626637584, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.22008558571360354, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4407629091157834, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4292296939530436, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.153457516394788, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4779432111086399, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.42938082279138273, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.36720232244744416, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.18421227895863315, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4841569295186865, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.17659857710927052, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.43901708743119683, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.16251572062938138, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.504137792563395, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.09294802312471048, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.10039894744886725, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.3531607188042858, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.1349922772392652, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.1381958549483014, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.26422743815167654, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.11714319535206957, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.11346446511593337, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.15991482183163408, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.1305157276666975, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.1059786102229136, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.15586951077565686, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.31800940138537775, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.5024549186437189, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.10290348648040436, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.1604076716890132, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.11294558347159712, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.1407490279587376, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.11282878483968255, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.16649202141995997, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18559542135951204, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3804842882867387, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13087682931309413, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19462952976787054, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.013538497707846785, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16678872216161894, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38156158663679846, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15471428129658016, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4580211317461481, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.21940429389247643, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4343280866601455, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3300025916068812, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5052501972629104, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1824401863423467, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36709433185688595, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3377854698776805, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.521201229892482, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15122189206102096, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.26750110507308866, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.07843772989359644, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1324578891826276, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.08163977068875294, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14134641571854575, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22948919855739472, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11809057094812304, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.27930342777387007, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1475503033983142, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22104108935973044, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16434349396840395, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28582614857210975, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.24911274612875411, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3603818786794888, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.23831215045289575, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17543744527808774, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28201016956553354, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30327872414714485, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.49804213541579834, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.21685485833927476, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3714219747170047, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.366137273378509, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.12656494026948834, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3156355830822428, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.13237645860785527, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3818322535970043, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4251675822745958, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.18154235663145316, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3906877817743504, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.12487405142186064, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.32817291858267583, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.16701570871784516, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4021286881032558, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.29383139922210444, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.15799783604363904, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3949243937510492, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.33752742535974617, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.112289032173749, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.17726100052085036, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.14276716121505195, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3191375424862687, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.31758120882708796, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.36577446688152704, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.33753843688529356, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3593717322097392, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.010176705289341573, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.13628770358024436, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3124983184732695, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.39962545473912425, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3710595252626966, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.18235247300784824, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.40779523977234755, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.013915288440632284, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.39112369376374106, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.45117912182963626, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2202248274013358, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.48474965676300186, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.45813938111627356, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.1593344703029041, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.22494952618128455, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4760660341798742, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.14326513489612383, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4034278533385552, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.12666372160329223, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.2650373529479294, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1649662542496744, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3466546857451185, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.01536966738773372, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.13829446068705525, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.32059338352121075, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.12319017561119809, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.38132934354408266, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.16521691795932783, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4134512022176617, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3435867188688158, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.12366644075037489, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.37651342775995167, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.2962222000049211, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1971903602140518, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.36269646528997446, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.18294117097472648, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4383387744769579, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.2922087191170089, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.2244748716483542, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.007281906895508523, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.17092467746295725, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4340281226634826, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.3969902065412634, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.22860414459682069, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.47331131010100724, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.17200673466668953, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.39948318545775324, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.10553225565626573, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.1763116500850642, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.17730543118229922, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4201842844735916, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.20298407172594946, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.427376330935813, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.1740044679403827, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.36375152376157177, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.13269353024089545, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.28998089836851504, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.01357525601063516, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.382987159925022, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.23159459211256597, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4615951366251923, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.17558199612672082, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.41334979014850587, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.20947801521367798, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.37699245483283905, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.24318848592140954, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.501343318078065, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.13784906211485343, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3161105981607342, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.15626231814206226, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.2918712789926548, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.322788951728102, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.40263021320001785, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1405026510197826, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.24785258181936404, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.15325316503089068, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.2756316951639811, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.27021732648475527, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.3720094104315791, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.3210853623565359, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.009559007108143848, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.05937666456658802, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.28306950244125495, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.14063630555225284, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.24531520458611372, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3264287329357334, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.41662443172249786, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.21397099133614067, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3568171392601981, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.16925466459550803, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.35912398848424326, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.21547697432588886, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.18039960295364865, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.14134641571854575, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3078571099929154, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3117564667581329, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.15658994837053716, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3084004707364603, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.20215771603666896, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.34483322672745376, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.16165057948216605, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.35172210628524053, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.012458960343878354, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.20053583653512705, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3585550644386862, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.2298971389591186, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.45764667682340326, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.2327080490816513, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4213315211213489, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.24362353508932386, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.14005830765988142, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.28271314565258726, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.01943377856541192, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.22183437291807073, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4690517750319636, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.38140613622171876, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.15089318423122547, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4354703980715437, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3931382365355541, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5429286385993002, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.007237155276460672, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.22233922818300378, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.46750271079023087, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.2769725060346048, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.2568191876426829, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.28135849152758385, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.13585608692428647, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.24602093467402117, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.014749122939855126, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.22585782564798598, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15795370509162066, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4301348480354066, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.2462954618610128, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.25512324153300714, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.006249447069096045, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.17131793456589922, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.3238973846683935, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.12832055613623328, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.19433944404681203, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.14482189302397735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2913876815877049, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1217802106941195, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.19314598726036322, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1361658548186748, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.3295167855876769, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.36936951339109975, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.12819825042984195, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.36515328991507745, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.21743769222637532, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4131100936190792, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.11914562165195522, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.36399317085467314, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1302352098354987, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.326035134708999, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.37693028676849333, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.14410670132605607, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.24025207593480963, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.16306957103469613, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.14219389639501667, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3550945020345845, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.013501937941345124, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3791177761741048, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3486134995973391, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3284649068899757, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.20988785322505515, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4323069807401831, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3397876134677058, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.009628007582726738, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.20247469739337648, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4418847146430419, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.24942094354139677, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.8827916928185874, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.9278293769424701, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5919743410620021, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.8142101616656354, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.31085126015841524, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6363359373482358, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.6642718379939968, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7768492311706325, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.7660237942267061, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.8523393041110139, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4479129164961325, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.27434065146872866, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4551761513917315, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.28112283847231073, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.2624404628276128, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.2044894275649509, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.1423256407233325, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3890910518336767, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.28253893006668057, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5344527156938984, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.24797984721910182, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4497423075151473, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.31443515194397026, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.47095169791332614, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.15310672854444382, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3863552232164501, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5712128723523814, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.2112174444529806, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.37115876147810895, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.11460384138378832, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.32308861733051, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1308613527030366, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3063146286877558, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15082713742973322, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.32932326410706136, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1327211341271203, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3314509193319989, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15844990886367694, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3420386248472483, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.2692774258928226, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.41794128483626714, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1357521816192783, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.2572585481186862, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.12724240656680139, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.2057386078262138, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.2045516326940124, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3580560102192693, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.11682130307923512, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.12966020590511304, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.27356002971987425, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.12030921204016166, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2935108999290831, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.22974300992320248, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.35766721538849355, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.21931515993565381, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.12066241764747698, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.27545568870085096, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.019283248858266676, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2136628330685448, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.28253893006668057, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4976525036856883, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2130931749764531, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.34386259606696806, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2789141599069488, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2912425895319303, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.44041590401859537, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0994331256564067, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3083809409945523, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.20416354003164872, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.302007087079803, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.20236126962624626, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3258121781111335, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1812045836887171, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.18649703687001343, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1441966459257424, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.10163106686838855, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1449839903475139, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.011839425862120785, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.12217624912667482, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.22607924847614314, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.24641985957917703, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3656201332966237, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.14455493909089934, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.26045772569635717, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.060104699568344466, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.13351234315884475, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.21097478779973527, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1222932912515144, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.27908865215418427, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3015846610603115, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.23870544239673078, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.30389718661514126, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.14288815197601673, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.14957316612525498, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.15892175003851755, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.12238991307064728, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.12217624912667482, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.2594914718095331, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.13237645860785527, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.28860674393595576, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.2741063450190292, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3186528530268389, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.23870544239673078, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.304002814359566, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.11955848575576285, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.24214093597439865, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.30667375521853, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17081061355061614, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3645499017230567, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.26958290276046354, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3771988116643981, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.27675048474641756, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3780460244391623, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18915983539487516, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3233732862049797, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17730543118229922, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3932130355670893, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2792720350291165, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4727106572557922, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.30611912992377904, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5089658823760935, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2350059388724058, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.47180561431754137, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.25666145410765273, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.42824842089739035, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.01218568017760398, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.28487594977117575, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4793282150965233, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3615889761528277, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.2758862937563794, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4673996585329364, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.12503614625842938, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20624064341134082, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.14539971733340926, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3142480613342232, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.013501937941345124, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1307524497557363, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3361579714658665, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.26784884804296605, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4730778159019489, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.2577701686990218, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.44408712841584985, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.30676022061786057, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4541682078728273, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.20889290280122064, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4127473131540302, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20666579426708878, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.2472264820166318, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.44131209510775493, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.20588815727980112, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.41944461991174653, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.28234422994155567, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.40408604199549997, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3368893372278425, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.22475293380632405, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4113282938664548, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.02586907629320447, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.17504630199215807, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.42263957382757394, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2970253066411792, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4937281552804402, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.29668873351523645, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.45439697849389765, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.22475293380632405, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4113282938664548, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3300025916068812, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.48848603918467354, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.07378730454743347, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2147829756231977, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.461889639754688, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.13588969750586194, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3523239132597748, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1781853859048144, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.36865727091511874, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.2961559727627133, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1087256678530004, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.2806568392288235, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.1117144649534104, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.11595071162902998, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.23613460338414927, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3018966700214589, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.19000969221027156, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.47485346348682694, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1826249361348376, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3484922054626185, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.14214337448390021, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3609159299635901, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.12580941330293896, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.25539472441248895, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.32512365186675757, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49575683948811416, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2775686235755007, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.12416350645592025, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.23265120010755289, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.12846497020051437, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2670865602673704, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.12837839907779722, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.27407435258732404, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.01943377856541192, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.28157636825815224, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2957851779920877, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.1380829650365223, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3721998912104682, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.04086511004567092, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2602720291074952, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2181762689207584, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.16091123830242154, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.29683125676353944, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.217554942150074, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4362387654025806, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.18772266185346026, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4516796575038181, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.28252374116432993, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3549531183419122, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11203754340102182, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.24500939878540784, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.028862054978314452, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1457751611852363, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3555305489093387, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.3388147925328834, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5613545455926224, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.2567411255293559, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.43316022307538615, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11084119214562044, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.31189626075375737, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.26845008380756696, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4949662669458603, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.006356553689956574, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.14219389639501667, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.42294374285621605, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3034375834959013, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.10531636385748798, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.15775047351971955, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26128489301072644, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.13693974024600017, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.24848210874138496, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.1967909040251079, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1331440297382392, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.260380807406192, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3015940724773674, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.19814442729413892, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3102637353553794, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.1547905499593561, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1797040059786851, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.31684376069016223, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.18285404868730815, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.30239028036773985, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1789898550500511, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.3097165910502381, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2126837065505244, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.11378142777276677, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.22340791296245502, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.15292856632736312, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.13410639648320277, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2766307218364423, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.28215396128745796, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.42707262506779775, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.34661236387446376, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.460219316901126, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.17615667556197442, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2360342341767823, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1665583359843711, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.3065906311539413, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.15533586874332386, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1629119279942046, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.28005921777118686, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.23292164090728384, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.4807364086898486, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.15742302643532463, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.3220278551038813, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.18889796346849766, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.390828018955539, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.1484131243041233, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.29204141824583923, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.019510108479333106, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.12690518984438146, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.34504023491572783, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.32158597295125274, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.47599808838263624, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.16605519952887438, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.4509175930579823, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.1453056698669881, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.39967038005662203, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.2533911497972673, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "bleu", - "score": 0.3713354961020269, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "translation", - "metric": "chrf", - "score": 0.5550866336796069, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.15404632289830114, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3934040018417114, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.14855426866172083, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4089031318363594, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.1736086198203101, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3517982963278223, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.34932113360659606, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.19118896363692645, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3809795532418233, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3917791418162285, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.39734352171767023, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.5945486848869352, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.2577701686990218, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.4782520457453995, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.20868721961570674, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.428749486637124, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.12718016030558363, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "bleu", - "score": 0.17600429416656618, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "translation", - "metric": "chrf", - "score": 0.3699002945708035, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.15606652450871636, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.38676973597326414, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.346045680932875, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4621083930255766, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.25751650996406256, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4024993164207148, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.2235704325446919, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.15022502107020383, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.35815688949507335, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.404747550284944, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.2569630819965695, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4638113964856152, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.3110277298634108, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4544814134892622, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.1514798524753532, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "bleu", - "score": 0.21085288029061555, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "translation", - "metric": "chrf", - "score": 0.4495966880080192, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.13628770358024436, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.30464291275706445, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.2873180113751827, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3691291664744644, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.21636840076404606, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.19001225225138996, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.15902128868434096, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.056621705833762526, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.20663551397330182, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.13423393480752616, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3299575528823155, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.1342816454725345, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.2947742083176782, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.19968127388777596, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.3882998250788871, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.09083570416152802, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "bleu", - "score": 0.08340582868969061, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "translation", - "metric": "chrf", - "score": 0.24375361043612415, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.29331713922012836, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.18582826054135923, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.27931164611946097, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.13628770358024436, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.196045070105177, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.24882723725145164, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.014111660160258993, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.11825188614600338, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.309191449874187, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.2884821307335442, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.33382277849238146, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.2689798160106529, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.05338400788913371, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "bleu", - "score": 0.15069228960836628, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "translation", - "metric": "chrf", - "score": 0.3780369016923967, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.22256474447332572, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.1087256678530004, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.1812150267056357, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.09624090077172921, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.08196612912062277, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.08546743910655354, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.009628007582726738, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.10539140971370214, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.13307637507950731, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.248301283911442, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.1612982609267219, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.026144042496829667, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.15080392997556943, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.020035925770441693, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "translation", - "metric": "chrf", - "score": 0.14949577610607986, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.27274442393032494, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.4767475272675149, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.08206174754800233, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.24600065227133203, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.2894206730562163, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.130697570216376, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.3655209094420809, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.28364158842913945, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.10704604894593339, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.28394660030720387, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.27434065146872866, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.5335196659354084, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.14868720326332424, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.31851758158862814, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.16574624158406068, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.39903127217688206, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.17441676789914212, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "bleu", - "score": 0.11047111196276078, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "translation", - "metric": "chrf", - "score": 0.34052178710726805, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.32404902054836443, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.13600287923663476, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.296137260965594, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.29939861106519894, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.14523626605098836, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.30140546115451755, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.025375434133933374, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.14023497693876652, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.36380485738859425, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.13821968766350226, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.38195203502692965, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.14062598436731893, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.30895718290882995, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.3379310421768954, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.18983865085357438, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "bleu", - "score": 0.11846592694382017, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "translation", - "metric": "chrf", - "score": 0.3327568989410052, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.2711712970899214, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.12748547320686965, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.2256255768392581, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.13897785762455162, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.23836155147452145, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.013649374730290785, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.22198095012235933, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.1434542641988108, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.36240616778818124, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.12276168155242136, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.33010159216538154, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.33188650052532137, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.15362952183523224, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "translation", - "metric": "chrf", - "score": 0.30296187648016454, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.2418791601714353, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3327258966414523, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.18472476303374016, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3755456818093384, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.16879768238484785, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.25555397978788114, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.09786652404503388, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.049912483769917554, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.25692379453535436, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4546530774152114, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.13836903384315105, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.3041148601474962, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.4258132414030401, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.015512728671064098, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "translation", - "metric": "chrf", - "score": 0.15128322251777712, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.10028126671358768, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.21465229625864304, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.12004125280185217, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.13075268692454742, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0953744984725567, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.12169529780486223, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.012305643362227912, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.03909826185078624, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.14419884809836403, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.30891047545292866, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.1736086198203101, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.26516347846751803, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.050527960640167395, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.15324458304516878, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.07507324760135455, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "translation", - "metric": "chrf", - "score": 0.10491879465581472, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.16340836420369564, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.36283746947775286, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.2012788513843773, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3241416301984527, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.222140444588514, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.29953031595565194, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.17864866390812006, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3351983303537736, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.00778050477663876, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.10228157247360439, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.28364356692412224, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.17781916046116683, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.4033164239257758, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.26660495672254986, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.48379919869596677, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.19694774164152168, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.38832716646104737, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.0942697851358349, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.2909029546052453, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.08096470168539781, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.1842927346134308, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "bleu", - "score": 0.16419136872156925, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "translation", - "metric": "chrf", - "score": 0.3585424355733966, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.17092467746295725, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.36754216836174997, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.2290624375320133, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3703637148185826, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.20316405901471601, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.19469497781111866, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.015417060337592398, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.1686298660722932, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3297355225093461, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3712864745826113, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.15022502107020383, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.25146961869209467, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.1537414828207279, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.26860510051078923, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.08158808350367645, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.32384784747508405, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.13438821320662678, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "bleu", - "score": 0.18398732411042557, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "translation", - "metric": "chrf", - "score": 0.3365595470257617, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.2048509043540121, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.30660769477099914, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.13583266175611605, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.3216755467134329, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.15351923691242442, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.17346101917834872, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.014779059188632846, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.2027129184521292, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.19506658275416644, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.37390084654467903, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.2570154296975664, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.30037434123502954, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.14019283646199668, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "translation", - "metric": "chrf", - "score": 0.29482817900208275, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2625069486168988, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.18398732411042557, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2401204673210609, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2557074827472021, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.11342612980533952, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.20642155523427064, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.1844324702587695, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.19003666214886608, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.2972636640814622, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.25656344973855477, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.052170870839955336, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.20873122779190237, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.11588655450753808, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "translation", - "metric": "chrf", - "score": 0.31184111541609666, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.14645066834461026, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.3063157759715837, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.13937542038981274, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.2484339727687842, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.2737880982480958, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.16027177058640993, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.32787056009967885, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.014191769632389028, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.2990163335159231, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.31381603830277516, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.3537931405693036, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.24806225111003116, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.10334447217760966, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "translation", - "metric": "chrf", - "score": 0.27914163518014634, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4018398428695008, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.13382613080002836, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.2537208483050712, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.2630950732259493, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.31644878211504707, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.01357525601063516, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.32899482453344325, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.37474288032090364, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.19471699714049806, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.4243692249576114, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.3966186698883196, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.3773883829664062, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.16426747109440132, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "translation", - "metric": "chrf", - "score": 0.3305176755869093, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.19834633509680927, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.3614699644212251, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.19000532642952978, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2405617210713385, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.22684236479857312, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.12605968092174913, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2981369001931117, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.01943377856541192, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.1250305362182298, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.32984658078581847, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.22065986906895835, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.4278272026300076, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.3740469325434541, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.12303973923740176, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.27974292659168287, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.22900275839396275, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.38971013705057145, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.2568248089527478, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "bleu", - "score": 0.23596413141640699, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "translation", - "metric": "chrf", - "score": 0.4460483451210987, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.21010332378415866, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.16049139739945859, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.019878741152560272, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.06349386212511017, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.07947942766247484, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.051273457709972596, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.23111943721577524, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.07507324760135455, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.12104111874038677, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.00966183574879227, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "translation", - "metric": "chrf", - "score": 0.15278097980557423, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.22559518514805962, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.10902248103931993, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.16583300501883477, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.09500430486926614, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.16275460726319185, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.00900384448670719, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.10967147003271659, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.22965648229220062, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.39443611468201295, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.2575778955813508, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.025268949786087753, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.09146177671563739, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.06902846920071223, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "translation", - "metric": "chrf", - "score": 0.28319385484238924, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.2154897801937284, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.4083528236853434, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.1740044679403827, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3783149893116463, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.1258273118584677, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3207394929288255, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.12217624912667482, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.31969612320402657, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.17935682644456008, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3091351691666631, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.14391777351450838, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.44334129048496157, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3628458215058983, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.10090835404165316, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.29517668740514497, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.20194534725070323, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3932742385329565, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.16136227410457432, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "bleu", - "score": 0.09694361543655163, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "translation", - "metric": "chrf", - "score": 0.3440711775946441, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.16574121720327287, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.3831242265731415, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.25011851152889697, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.37126196571844006, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.05671337518059672, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.26251266297831083, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.16778622160516982, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.3951324994252282, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.12032473082025806, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.257107616327564, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.14988552732741944, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.38777884437248195, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.41537893836225864, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.2791853718580844, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.4442614192163653, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.16553354669449483, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.3691421692417742, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.13088135388440164, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "bleu", - "score": 0.16165057948216605, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "translation", - "metric": "chrf", - "score": 0.40481686014626656, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.18125609699008438, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.07021707359312077, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.22277615768791725, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.08568635726825895, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.15015893458515112, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.08071364532479955, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.16483481050649815, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.01399317758408034, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.12032473082025806, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.2284570301856708, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.3056690039624212, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.24959865593445524, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.009321299833410845, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.03544218674244847, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.1649228640173518, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.008521982065882127, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "bleu", - "score": 0.08428682606415534, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "translation", - "metric": "chrf", - "score": 0.18678471555453846, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2511187197601112, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.47819642315416905, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.3506669990311196, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.5110276476843241, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2823806497463373, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4097065111577764, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.36354348777947165, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.16027177058640993, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.38029889329714306, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.2821717364449649, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.46605842048009466, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.23564633388537584, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.447428006095353, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.1947354557370754, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.3243594420480133, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4771341471235553, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.2871055620293988, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "bleu", - "score": 0.28253893006668057, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "translation", - "metric": "chrf", - "score": 0.4809444160915631, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.1537414828207279, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3722050787080825, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.20079789489773447, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.36306781014142475, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.18254530689454584, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.1217802106941195, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.18747753884336388, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.15646461125164918, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.1004877071264788, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.27596073639342855, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3355401995154488, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.13628770358024436, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3424046100869749, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.10146459445560989, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.22506824404168607, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.11873119582007514, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.3049590220162351, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.013010079472105241, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "bleu", - "score": 0.08876856613382532, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "translation", - "metric": "chrf", - "score": 0.22686382638352, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.2572390116372129, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.13966768009198655, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.2813477376997818, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.20326479836901273, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.025565199775551904, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.20649200037444357, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.14868720326332424, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3990908050465795, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.25340620432916805, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.24200501869186217, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.012909307569742633, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "bleu", - "score": 0.14410670132605607, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "translation", - "metric": "chrf", - "score": 0.3440349878645951, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.15014755317658912, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.37836116314741347, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.06225053846006199, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.19628052952282465, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2461113033172792, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.23440058276054598, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2954875428732851, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.12577829595095136, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.2424544420544208, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.28515517056035755, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.18696425994936555, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.46256405623661556, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.26289516212337455, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.4739347975118012, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.30103736170459866, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.4638998374230593, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.0052216413024891595, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "bleu", - "score": 0.20519422155676922, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "translation", - "metric": "chrf", - "score": 0.37142687967287463, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.13600898159884844, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.3230757564905893, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.2235267829375094, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.4197003930616035, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.12017396628208415, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.17861403940933454, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.12307180394105495, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.17008525013313108, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.019510108479333106, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.12017396628208415, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.18853872865175844, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.1477411900250408, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.47448172512052467, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.33941255263446807, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.11458346677361843, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.13803309048601614, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.27926501329820147, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.048472513540084076, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "translation", - "metric": "chrf", - "score": 0.15895910055429568, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.28588995835566733, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.1509901905071242, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.42614589006990955, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.16422774333078005, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.07387254485071908, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.18666450359774303, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.09706088585617588, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.1616203397255244, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.15894780290856192, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.4341048241384398, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.12107046798719928, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.26610410497187936, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.2187290170234865, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.12149201826977803, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "translation", - "metric": "chrf", - "score": 0.21149452047942327, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.19928950404960785, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.3515916946368607, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.47110207134358734, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.12322620396842734, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.2816556920367689, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.31252036326890786, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.11761715910158331, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.255091096526975, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.1303852679364709, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.3864868842126462, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.34706134175419817, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.26059169395816123, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.356639946525953, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.14615116208406398, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.2812535721592, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.16500884451724743, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "bleu", - "score": 0.15864869257759262, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "translation", - "metric": "chrf", - "score": 0.36522352464212327, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.1250305362182298, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.35192066105839037, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.23734991438269704, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.05708041498286245, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.33319628333145845, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.012680136469239416, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.14326513489612383, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.3686286225188453, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.34008001988346953, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.1478206449158964, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.3434941005489122, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.1258273118584677, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.3288942334010695, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.31233654238789915, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.2383335612549986, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "bleu", - "score": 0.1459371948060594, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "translation", - "metric": "chrf", - "score": 0.3585936421373033, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.39208241867588406, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.12671660613804978, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3813787743264216, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.16338968219757316, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.43516585142042474, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.16359043508686386, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3659400820420475, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.14614460466911597, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3030057671331465, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.05027439173128933, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.2354400786909369, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.463705123915972, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.2475303873008388, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.4668999300883758, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.2196760025169154, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.43549997500530213, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.12268576462201722, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "bleu", - "score": 0.21921690700367402, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "translation", - "metric": "chrf", - "score": 0.3655682504876654, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.3141940399992296, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.2175732217739929, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.26934434229495274, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.1965525334564022, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.10163106686838855, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.19592782150484342, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.14463936736378039, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.06695900686562914, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.17379715979609378, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.11318741602028208, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.3385268327723179, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.2521939110082033, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.22946638870010946, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.10322985794794913, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.23137708386333908, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.13877364471405382, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "translation", - "metric": "chrf", - "score": 0.16536256958499396, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.19331968002780792, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.30653379537681946, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.16419136872156925, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.38578825514877557, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.11319316697505612, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3044933526278424, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.2055907593919444, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.11772040354469114, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.26408792460406616, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.1334223706673101, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.32135454277697334, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.21322825633117104, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.46552307123187675, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.26036802768146033, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.40212197517878956, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.22177657695506436, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.362986513681601, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.21076307738524264, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "bleu", - "score": 0.14207405313947058, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "translation", - "metric": "chrf", - "score": 0.3375092428241583, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.10372851412328025, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.05428552151774627, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.08170272572786982, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.054560358307203495, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.08394808675649712, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.013724307289441972, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.031040886211736902, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.031040886211736902, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.030944828051431932, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.020277511772040634, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.07555954862563066, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "translation", - "metric": "chrf", - "score": 0.08035467692935112, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.2197940423573754, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.3252925042873819, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.2386512909161297, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.20056920628012903, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.019208954982955537, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.14965254226465277, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.26352230043195884, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.4800402838260972, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.2744862572324789, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.11146907857325296, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.30553778277241345, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.011981278425892186, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "translation", - "metric": "chrf", - "score": 0.11890083473431896, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.16894127989367852, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.3419460050290285, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.3808157877385458, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4468825428794822, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0810371533925042, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.14661692946967528, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.08644370615638991, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.1799031576653526, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.01399317758408034, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.16961162496305443, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.2560718753815588, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.36565527196849945, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.5772340526881832, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.42571201827765304, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.4882229662112406, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.11350052389636105, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.20482099945558166, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.3757030069486771, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.44130967365135815, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.15641377436989223, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "bleu", - "score": 0.2926394666583954, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "translation", - "metric": "chrf", - "score": 0.36657660679561177, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.14062598436731893, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.3764695982007195, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.26772209592710927, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.16587560311800356, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.36304298665164114, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.27837115847479993, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.015137007844878625, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.130697570216376, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.4014069285003254, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.36149919525841795, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.1584846494016487, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.36323507681554296, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.17853738103790043, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.3932611225012359, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.21824882776994264, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.46610251429617194, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.14482189302397735, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.29373162379324574, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "translation", - "metric": "chrf", - "score": 0.30486946045076013, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.19207278754983098, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.18980024752692398, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.17719555918098742, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.125919848913809, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.12820373892360745, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.12835230243937998, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.17253693310798596, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.013724307289441972, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.125919848913809, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.193779543005732, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.18123509091399645, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.13447737207790966, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.2292142526494232, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.11653181164997596, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.11943156166987552, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "bleu", - "score": 0.18043239916836057, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "translation", - "metric": "chrf", - "score": 0.28954208185359936, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.1968536715007284, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.333542500417417, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.17016486621490087, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3423671585123332, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.20172859170422008, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.1422809818637565, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.013649374730290785, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.13033894166590243, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.32686841499906305, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.2572381952329596, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.40593200139482377, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.1511794025087067, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3320313756459434, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.011615369706513964, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.08244068023641246, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.1459371948060594, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.3420465554679724, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.01361221447158419, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "translation", - "metric": "chrf", - "score": 0.28697920902986146, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.14976409594561182, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.35357199599186406, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.1665583359843711, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.2627459510960287, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.26627996704195217, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.13836903384315108, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.24978646356047463, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.01274477939514699, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.14023497693876652, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.33813739363247586, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.25006681812704, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.4571706387484243, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.16829158981319015, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.43343086093146, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.18889796346849766, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.4769236082569465, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.17382262531034068, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "bleu", - "score": 0.30611912992377904, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "translation", - "metric": "chrf", - "score": 0.4602147817127031, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.20740300708624634, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.11206360906932318, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.11591282390598331, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.10734755849153174, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.1547149164508717, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.014396080136217076, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.08956661266587752, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.13908487697830615, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.22268940623874697, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.18086432478827452, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.11823053204772466, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.2436674848852202, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.05991545180730296, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "translation", - "metric": "chrf", - "score": 0.11204525682954576, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.19393790238357375, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.11424261736422782, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2214641710932888, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2060727232464618, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.015322564973157411, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.16060318251525468, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.4205735776331965, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.36353587005992366, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.1465805099425248, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.2504062375822152, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.08423268420860885, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "translation", - "metric": "chrf", - "score": 0.16575157845273134, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.2354734090463839, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.15803708011407422, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.1534362334139164, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.10180772252352548, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.1504685575799601, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.019510108479333106, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.08419649365121126, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.13894487895537852, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.16029555355562974, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.20725689371393963, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.08700223397019134, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.13038203143994997, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.08565389722143929, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.029825717020904915, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "translation", - "metric": "chrf", - "score": 0.13917503995313457, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.029484944886992947, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.08447773742536654, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.13534893625087907, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.09370499064568831, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.09290214674953907, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.13365574149528514, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.08208989406093385, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.14930624110293342, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.3164816989291774, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.14389998233563306, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.019789621633805, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.015694709781828922, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.0013061650992685476, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "translation", - "metric": "chrf", - "score": 0.03578332662951316, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.41557169141417455, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.2152971203854131, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.45844266102619613, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.3510795373941694, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.12008699630291321, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.3057995109706267, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.014563813791017045, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.39078916716317236, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4424078695089543, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.37940478449513215, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.37844523324950047, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.22192938454540428, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.4722416995824046, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.2997971304127524, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "translation", - "metric": "chrf", - "score": 0.3490555531851794, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.2603954279622387, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.30277029197532107, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.4119930658213665, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.3172363525732528, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.11118250314256345, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.15998889622726925, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.011952636623752582, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.18643810742149283, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.27451118944416963, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.49409921315387106, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.32339783931086485, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.13682541096468273, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.03859789398073438, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.2579829463429384, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.09893019435891957, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "bleu", - "score": 0.09812163258584553, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "translation", - "metric": "chrf", - "score": 0.21604420923574277, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.09824473131578967, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.26161454205511375, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.17813207960290023, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.12613950642084928, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.151940344351269, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.01227543166891452, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.10007559720315146, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.17039219737754951, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.23064197108367845, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.3431872475401807, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.12288275235434755, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.024803681108889294, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.18600740402846921, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.011107583005137327, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "translation", - "metric": "chrf", - "score": 0.08459474168598785, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.1077448900968642, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.1466632434186726, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.19398573687939527, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.11116961409150189, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.14407603400456293, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.09701470884546518, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.13602315844950702, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.012097231620361405, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.09567839473656903, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.1420055095780977, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.1691891767891315, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.3972953748769731, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.12593609141437836, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.2608331698897448, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.11116961409150189, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.1400202211268643, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.1178826285649154, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.27891920549891147, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.017630650669775427, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "translation", - "metric": "chrf", - "score": 0.08279488257497868, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.1535686541317235, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.3273820311085689, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.1479757676110522, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.36882257747840863, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.26565870470756586, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.3292881368564412, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.014735502561072271, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.35840752330309295, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.14270596284245182, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.39124824093913935, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.17335685887215152, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.4144645977372426, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.16446075661451018, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.3554659823158598, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.12795100096585615, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.40513606402908053, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.011700892988098854, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "translation", - "metric": "chrf", - "score": 0.39515912915016366, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.2393868174483411, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.16951713127949472, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.13967106347277614, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.11873119582007514, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.23521660134811131, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.003917516359736889, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.14618317074862378, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.203586302029077, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.08382475612465994, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.21886462133645654, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.1227835638933406, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.09628098247644358, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.04911913163341779, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "translation", - "metric": "chrf", - "score": 0.13078905745700525, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bho", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "nl", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", + "bcp_47": "ja", "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -218150,5071 +85307,5047 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", @@ -223222,15 +90355,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -223238,7567 +90371,7567 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mai", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mai", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mai", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { - "model": "google/gemini-2.0-flash-001", + "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "te", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "te", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "te", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "te", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mr", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mr", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "jv", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "jv", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "jv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "jv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "vi", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "vi", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "vi", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "vi", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "vi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "vi", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "vi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ta", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ta", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ta", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ta", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ta", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ta", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ta", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ta", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fa", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fa", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fa", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fa", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "tr", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "tr", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "tr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "tr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "tr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "tr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "tr", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "tr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yue", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yue", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yue", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yue", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yue", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yue", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yue", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yue", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yue", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yue", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yue", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ko", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ko", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ko", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ko", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ko", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ko", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ko", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ko", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ko", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ko", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ko", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "it", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "it", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "it", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "it", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "it", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "it", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "it", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "it", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "it", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "it", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "it", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fil", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fil", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fil", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fil", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fil", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fil", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fil", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fil", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fil", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fil", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fil", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "arz", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "arz", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "arz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "arz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "arz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "arz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", @@ -230806,15 +97939,15 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -230822,2487 +97955,2511 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "gu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "th", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "th", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "th", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "th", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "th", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "th", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "th", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "th", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "th", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "th", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "th", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "th", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kn", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kn", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kn", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kn", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kn", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kn", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ml", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ml", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ml", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ml", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ml", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ml", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ml", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ml", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ml", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ml", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ml", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ml", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "or", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "or", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "or", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "or", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "or", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "or", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "or", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "or", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "or", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "or", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "or", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "or", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pl", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pl", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pl", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pl", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "pl", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pl", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ha", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ha", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ha", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ha", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ha", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ha", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ha", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ha", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ha", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ha", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ha", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ha", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sd", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sd", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sd", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sd", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sd", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sd", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sd", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sd", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sd", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sd", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sd", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sd", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ms", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ms", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ms", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ms", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ms", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ms", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "my", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "my", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "my", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "my", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "my", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "my", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "my", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "my", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "my", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "my", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "my", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "my", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "am", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "am", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "am", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "am", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "am", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "am", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "am", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "am", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "am", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "am", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "am", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "am", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "om", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "om", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "om", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "om", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "om", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "om", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "om", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "om", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "om", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "om", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "om", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "om", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bho", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bho", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bho", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bho", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bho", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bho", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "bho", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bho", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bho", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bho", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bho", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bho", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uz", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uz", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uz", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uz", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uz", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uz", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uz", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uz", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "az", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "az", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "az", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "az", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "az", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "az", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "az", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "az", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "az", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "az", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "az", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "az", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "su", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "su", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "su", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "su", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "su", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "su", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "su", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "su", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "su", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "su", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "su", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "su", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "nl", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "nl", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "nl", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "nl", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "nl", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "nl", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ary", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ary", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ary", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ary", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ary", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ary", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "uk", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "uk", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "uk", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "uk", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "uk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "uk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "yo", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "yo", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yo", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "yo", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "yo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "yo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ig", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ig", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ig", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ig", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ig", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ig", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ceb", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ceb", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ceb", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ceb", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ceb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ceb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "awa", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "awa", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "awa", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "awa", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "awa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "awa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mg", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mg", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mg", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mg", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mg", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mg", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ro", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ro", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ro", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ro", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ro", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ro", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ne", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ne", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ne", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ne", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -233310,659 +100467,683 @@ "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mai", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mai", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mai", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "as", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "as", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "as", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "as", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "as", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "as", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ny", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ny", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ny", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ny", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ny", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ny", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "so", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "so", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "so", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "so", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "so", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "so", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "mag", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "mag", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mag", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "mag", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "mag", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "mag", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sr", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sr", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sr", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sr", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "si", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "si", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "si", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "si", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "si", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "si", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, @@ -233970,7 +101151,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "km", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, @@ -233978,7 +101159,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "km", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, @@ -233986,87 +101167,87 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "km", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "km", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "km", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "km", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hne", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, @@ -234074,7 +101255,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hne", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, @@ -234082,79 +101263,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hne", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hne", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hne", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hne", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, @@ -234162,7 +101343,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fuv", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, @@ -234170,7 +101351,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fuv", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, @@ -234178,79 +101359,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fuv", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fuv", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fuv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fuv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, @@ -234258,7 +101439,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zu", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, @@ -234266,7 +101447,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zu", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, @@ -234274,79 +101455,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zu", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zu", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, @@ -234354,7 +101535,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "kk", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, @@ -234362,87 +101543,87 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "kk", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "kk", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "kk", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "kk", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "kk", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, @@ -234450,7 +101631,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "cs", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, @@ -234458,7 +101639,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "cs", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, @@ -234466,79 +101647,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "cs", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "cs", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "cs", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "cs", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, @@ -234546,7 +101727,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sv", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, @@ -234554,7 +101735,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sv", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, @@ -234562,79 +101743,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sv", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sv", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sv", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, @@ -234642,7 +101823,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hu", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, @@ -234650,87 +101831,87 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hu", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hu", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hu", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hu", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, @@ -234738,7 +101919,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "el", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, @@ -234746,95 +101927,95 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "el", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "el", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "el", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "el", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "el", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sn", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, @@ -234842,7 +102023,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sn", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, @@ -234850,79 +102031,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sn", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sn", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, @@ -234930,7 +102111,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ckb", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, @@ -234938,7 +102119,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ckb", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 1, @@ -234946,79 +102127,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ckb", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ckb", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ckb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ckb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, @@ -235026,7 +102207,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "rw", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, @@ -235034,7 +102215,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "rw", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, @@ -235042,79 +102223,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "rw", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "rw", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "rw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "rw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, @@ -235122,7 +102303,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "wo", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, @@ -235130,7 +102311,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "wo", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, @@ -235138,79 +102319,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "wo", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "wo", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "wo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "wo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, @@ -235218,7 +102399,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "aeb", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, @@ -235226,7 +102407,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "aeb", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, @@ -235234,79 +102415,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "aeb", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "aeb", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "aeb", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "aeb", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, @@ -235314,7 +102495,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ilo", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, @@ -235322,87 +102503,87 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ilo", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ilo", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ilo", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ilo", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ilo", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, @@ -235410,7 +102591,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "xh", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, @@ -235418,87 +102599,87 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "xh", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "xh", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "xh", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "xh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "xh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, @@ -235506,7 +102687,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ti", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, @@ -235514,87 +102695,87 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ti", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ti", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ti", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ti", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ti", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, @@ -235602,7 +102783,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "be", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, @@ -235610,7 +102791,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "be", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, @@ -235618,79 +102799,79 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "be", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "be", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "be", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "be", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, @@ -235698,7 +102879,7 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "lua", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, @@ -235706,86 +102887,86 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "lua", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "lua", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "lua", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "lua", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "lua", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "lua", "task": "classification", "metric": "accuracy",